Skip to content
Snippets Groups Projects
Commit 6347e9bf authored by Marco Biasini's avatar Marco Biasini
Browse files

move the bulk of PDBize to C++

parent 9938205c
No related branches found
No related tags found
No related merge requests found
...@@ -18,10 +18,12 @@ ...@@ -18,10 +18,12 @@
//------------------------------------------------------------------------------ //------------------------------------------------------------------------------
#define BOOST_PYTHON_MAX_ARITY 20 #define BOOST_PYTHON_MAX_ARITY 20
#include <boost/python.hpp> #include <boost/python.hpp>
#include <boost/python/suite/indexing/vector_indexing_suite.hpp>
#include <boost/python/slice.hpp> #include <boost/python/slice.hpp>
using namespace boost::python; using namespace boost::python;
#include <ost/geom/geom.hh> #include <ost/geom/geom.hh>
#include <ost/geom/export_helper/vector.hh>
using namespace geom; using namespace geom;
...@@ -138,4 +140,9 @@ void export_Mat4() ...@@ -138,4 +140,9 @@ void export_Mat4()
.def("PasteTranslation",&Mat4::PasteTranslation) .def("PasteTranslation",&Mat4::PasteTranslation)
.add_property("data",mat4_data) .add_property("data",mat4_data)
; ;
class_<Mat4List>("Mat4List", init<>())
.def(vector_indexing_suite<Mat4List>())
.def(geom::VectorAdditions<Mat4List>())
;
} }
...@@ -22,6 +22,7 @@ ...@@ -22,6 +22,7 @@
#include <cassert> #include <cassert>
#include <cstddef> // for size_t #include <cstddef> // for size_t
#include <ostream> #include <ostream>
#include <vector>
#include <boost/operators.hpp> #include <boost/operators.hpp>
...@@ -136,6 +137,8 @@ double i32, double i33); ...@@ -136,6 +137,8 @@ double i32, double i33);
}; };
typedef std::vector<Mat4> Mat4List;
DLLEXPORT_OST_GEOM std::ostream& operator<<(std::ostream& os, const Mat4& m); DLLEXPORT_OST_GEOM std::ostream& operator<<(std::ostream& os, const Mat4& m);
} // ns geom } // ns geom
......
...@@ -19,7 +19,7 @@ ...@@ -19,7 +19,7 @@
import os, tempfile, ftplib, httplib import os, tempfile, ftplib, httplib
from _ost_io import * from _ost_io import *
from ost import mol, geom, conop from ost import mol, geom, conop, seq
profiles=None profiles=None
...@@ -358,29 +358,12 @@ def LoadMMCIF(filename, restrict_chains="", fault_tolerant=None, calpha_only=Non ...@@ -358,29 +358,12 @@ def LoadMMCIF(filename, restrict_chains="", fault_tolerant=None, calpha_only=Non
# MMCifInfoBioUnit.PDBize, since this function is not included in SPHINX. # MMCifInfoBioUnit.PDBize, since this function is not included in SPHINX.
def _PDBize(biounit, asu, seqres=None, min_polymer_size=10, def _PDBize(biounit, asu, seqres=None, min_polymer_size=10,
transformation=False): transformation=False):
def _CopyAtoms(src_res, dst_res, edi, trans=geom.Mat4()):
atom_pos_wrong = False
for atom in src_res.atoms:
tmp_pos = geom.Vec4(atom.pos)
new_atom=edi.InsertAtom(dst_res, atom.name, geom.Vec3(trans*tmp_pos),
element=atom.element,
occupancy=atom.occupancy,
b_factor=atom.b_factor,
is_hetatm=atom.is_hetatom)
for p in range(0,3):
if new_atom.pos[p] <= -1000:
atom_pos_wrong = True
elif new_atom.pos[p] >= 10000:
atom_pos_wrong = True
return atom_pos_wrong
chain_names='ABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789abcdefghijklmnopqrstuvwxyz'
# create list of operations # create list of operations
# for cartesian products, operations are stored in a list, multiplied with # for cartesian products, operations are stored in a list, multiplied with
# the next list of operations and re-stored... until all lists of operations # the next list of operations and re-stored... until all lists of operations
# are multiplied in an all-against-all manner. # are multiplied in an all-against-all manner.
operations = biounit.GetOperations() operations = biounit.GetOperations()
trans_matrices = list() trans_matrices = geom.Mat4List()
if len(operations) > 0: if len(operations) > 0:
for op in operations[0]: for op in operations[0]:
rot = geom.Mat4() rot = geom.Mat4()
...@@ -407,86 +390,14 @@ def _PDBize(biounit, asu, seqres=None, min_polymer_size=10, ...@@ -407,86 +390,14 @@ def _PDBize(biounit, asu, seqres=None, min_polymer_size=10,
assu = asu.Select('cname=' + ','.join(biounit.GetChainList())) assu = asu.Select('cname=' + ','.join(biounit.GetChainList()))
# use each transformation on the view, store as entity and transform, PDBize # use each transformation on the view, store as entity and transform, PDBize
# the result while adding everything to one large entity # the result while adding everything to one large entity
pdb_bu = mol.CreateEntity() ss = seqres
edi = pdb_bu.EditXCS(mol.BUFFERED_EDIT) if not ss:
cur_chain_name = 0 ss = seq.SequenceList()
water_chain = mol.ChainHandle()
ligand_chain = mol.ChainHandle() pdb_bu = mol.alg.PDBize(assu, trans_matrices, ss, min_polymer_size=min_polymer_size,
a_pos_wrong = False shift_to_fit=transformation)
for tr in trans_matrices:
# do a PDBize, add each new entity to the end product
for chain in assu.chains:
residue_count = len(chain.residues)
if seqres:
seqres_chain = seqres.FindSequence(chain.name)
if seqres_chain.IsValid():
residue_count = len(seqres_chain)
if chain.is_polymer and residue_count >= min_polymer_size:
if len(chain_names) == cur_chain_name:
raise RuntimeError('Running out of chain names')
new_chain = edi.InsertChain(chain_names[cur_chain_name])
cur_chain_name += 1
edi.SetChainDescription(new_chain, chain.description)
edi.SetChainType(new_chain, chain.type)
new_chain.SetStringProp('original_name', chain.name)
if chain.HasProp("pdb_auth_chain_name"):
new_chain.SetStringProp("pdb_auth_chain_name",
chain.GetStringProp("pdb_auth_chain_name"))
for res in chain.residues:
new_res = edi.AppendResidue(new_chain, res.name, res.number)
a_b = _CopyAtoms(res, new_res, edi, tr)
if not a_pos_wrong:
a_pos_wrong = a_b
elif chain.type == mol.CHAINTYPE_WATER:
if not water_chain.IsValid():
# water gets '-' as name
water_chain = edi.InsertChain('-')
edi.SetChainDescription(water_chain, chain.description)
edi.SetChainType(water_chain, chain.type)
for res in chain.residues:
new_res = edi.AppendResidue(water_chain, res.name)
new_res.SetStringProp('type', mol.StringFromChainType(chain.type))
new_res.SetStringProp('description', chain.description)
a_b = _CopyAtoms(res, new_res, edi, tr)
if not a_pos_wrong:
a_pos_wrong = a_b
else:
if not ligand_chain.IsValid():
# all ligands, put in one chain, are named '_'
ligand_chain = edi.InsertChain('_')
last_rnum = 0
else:
last_rnum = ligand_chain.residues[-1].number.num
residues=chain.residues
ins_code='\0'
if len(residues)>1:
ins_code='A'
for res in chain.residues:
new_res = edi.AppendResidue(ligand_chain, res.name,
mol.ResNum(last_rnum+1, ins_code))
new_res.SetStringProp('description', chain.description)
new_res.SetStringProp('type', mol.StringFromChainType(chain.type))
new_res.SetStringProp("original_name", chain.name)
if chain.HasProp("pdb_auth_chain_name"):
new_res.SetStringProp("pdb_auth_chain_name",
chain.GetStringProp("pdb_auth_chain_name"))
ins_code = chr(ord(ins_code)+1)
_CopyAtoms(res, new_res, edi, tr)
a_b = _CopyAtoms(res, new_res, edi, tr)
if not a_pos_wrong:
a_pos_wrong = a_b
move_to_origin = None
if a_pos_wrong:
start = pdb_bu.bounds.min
move_to_origin = geom.Mat4(1,0,0,(-999 - start[0]),
0,1,0,(-999 - start[1]),
0,0,1,(-999 - start[2]),
0,0,0,1)
edi = pdb_bu.EditXCS(mol.UNBUFFERED_EDIT)
edi.ApplyTransform(move_to_origin)
conop.ConnectAll(pdb_bu)
if transformation: if transformation:
return pdb_bu, move_to_origin return pdb_bu, pdb_bu.GetTransformationMatrix()
return pdb_bu return pdb_bu
MMCifInfoBioUnit.PDBize = _PDBize MMCifInfoBioUnit.PDBize = _PDBize
......
...@@ -171,9 +171,9 @@ class TestMMCifInfo(unittest.TestCase): ...@@ -171,9 +171,9 @@ class TestMMCifInfo(unittest.TestCase):
seqres=True, seqres=True,
info=True) info=True)
pdb_ent, t = info.GetBioUnits()[0].PDBize(ent, transformation=True) pdb_ent, t = info.GetBioUnits()[0].PDBize(ent, transformation=True)
self.assertAlmostEquals(pdb_ent.GetCenterOfAtoms()[0], -915.759, 2) self.assertAlmostEquals(pdb_ent.GetCenterOfAtoms()[0], -915.759, 1)
self.assertAlmostEquals(pdb_ent.GetCenterOfAtoms()[1], -952.345, 2) self.assertAlmostEquals(pdb_ent.GetCenterOfAtoms()[1], -952.345, 1)
self.assertAlmostEquals(pdb_ent.GetCenterOfAtoms()[2], 3221.75, 2) self.assertAlmostEquals(pdb_ent.GetCenterOfAtoms()[2], 3221.75, 1)
self.assertEquals(geom.Equal(t, self.assertEquals(geom.Equal(t,
geom.Mat4(1,0,0,-920.462, geom.Mat4(1,0,0,-920.462,
0,1,0,-966.654, 0,1,0,-966.654,
......
...@@ -24,6 +24,7 @@ ...@@ -24,6 +24,7 @@
#include <ost/mol/alg/superpose_frames.hh> #include <ost/mol/alg/superpose_frames.hh>
#include <ost/mol/alg/filter_clashes.hh> #include <ost/mol/alg/filter_clashes.hh>
#include <ost/mol/alg/consistency_checks.hh> #include <ost/mol/alg/consistency_checks.hh>
#include <ost/mol/alg/pdbize.hh>
#include <ost/export_helper/pair_to_tuple_conv.hh> #include <ost/export_helper/pair_to_tuple_conv.hh>
using namespace boost::python; using namespace boost::python;
...@@ -168,6 +169,8 @@ BOOST_PYTHON_MODULE(_ost_mol_alg) ...@@ -168,6 +169,8 @@ BOOST_PYTHON_MODULE(_ost_mol_alg)
def("PrintResidueRDMap",&mol::alg::PrintResidueRDMap); def("PrintResidueRDMap",&mol::alg::PrintResidueRDMap);
def("ResidueNamesMatch",&mol::alg::ResidueNamesMatch); def("ResidueNamesMatch",&mol::alg::ResidueNamesMatch);
def("PDBize", &mol::alg::PDBize, (arg("asu"), arg("transformations"), arg("seqres"),
arg("min_polymer_size")=10, arg("shift_to_fit")=false));
} }
...@@ -10,6 +10,7 @@ set(OST_MOL_ALG_HEADERS ...@@ -10,6 +10,7 @@ set(OST_MOL_ALG_HEADERS
trajectory_analysis.hh trajectory_analysis.hh
structure_analysis.hh structure_analysis.hh
consistency_checks.hh consistency_checks.hh
pdbize.hh
) )
set(OST_MOL_ALG_SOURCES set(OST_MOL_ALG_SOURCES
...@@ -23,6 +24,7 @@ set(OST_MOL_ALG_SOURCES ...@@ -23,6 +24,7 @@ set(OST_MOL_ALG_SOURCES
trajectory_analysis.cc trajectory_analysis.cc
structure_analysis.cc structure_analysis.cc
consistency_checks.cc consistency_checks.cc
pdbize.cc
) )
set(MOL_ALG_DEPS ost_mol ost_seq) set(MOL_ALG_DEPS ost_mol ost_seq)
......
//------------------------------------------------------------------------------
// This file is part of the OpenStructure project <www.openstructure.org>
//
// Copyright (C) 2008-2011 by the OpenStructure authors
//
// This library is free software; you can redistribute it and/or modify it under
// the terms of the GNU Lesser General Public License as published by the Free
// Software Foundation; either version 3.0 of the License, or (at your option)
// any later version.
// This library is distributed in the hope that it will be useful, but WITHOUT
// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
// FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more
// details.
//
// You should have received a copy of the GNU Lesser General Public License
// along with this library; if not, write to the Free Software Foundation, Inc.,
// 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
//------------------------------------------------------------------------------
#include "pdbize.hh"
#include <ost/mol/residue_handle.hh>
#include <ost/mol/residue_view.hh>
#include <ost/mol/atom_handle.hh>
#include <ost/mol/atom_view.hh>
#include <ost/mol/transfer_connectivity.hh>
#include <ost/mol/chain_handle.hh>
#include <ost/mol/chain_view.hh>
#include <ost/mol/xcs_editor.hh>
#include <ost/seq/sequence_handle.hh>
namespace ost { namespace mol { namespace alg {
const char* POLYPEPTIDE_CHAIN_NAMES="ABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789abcdefghijklmnopqrstuvwxyz";
const char* LIGAND_CHAIN_NAME="_";
const char* WATER_CHAIN_NAME="-";
namespace {
bool copy_atoms(ResidueView src_res, ResidueHandle dst_res, XCSEditor& edi,
const geom::Mat4 transform) {
bool needs_adjustment = false;
for (AtomViewList::const_iterator
i = src_res.GetAtomList().begin(),
e = src_res.GetAtomList().end(); i != e; ++i) {
AtomHandle new_atom = edi.InsertAtom(dst_res, i->GetName(),
geom::Vec3(transform*i->GetPos()),
i->GetName(), i->GetOccupancy(),
i->GetBFactor(), i->IsHetAtom());
geom::Vec3 pos = new_atom.GetPos();
for (int j = 0; j<3; ++j) {
needs_adjustment|= pos[j]<=-1000 || pos[j]>=10000;
}
}
return needs_adjustment;
}
}
EntityHandle PDBize(EntityView asu, const geom::Mat4List& transforms,
seq::SequenceList seqres, int min_polymer_size,
bool shift_to_fit)
{
EntityHandle pdb_bu = CreateEntity();
XCSEditor edi = pdb_bu.EditXCS(BUFFERED_EDIT);
ChainHandle ligand_chain;
ChainHandle water_chain;
const char* curr_chain_name = POLYPEPTIDE_CHAIN_NAMES;
bool needs_adjustment = false;
int last_rnum = 0;
std::map<ResidueHandle, ResidueHandle> dst_to_src_map;
for (geom::Mat4List::const_iterator i = transforms.begin(),
e = transforms.end(); i != e; ++i) {
for (ChainViewList::const_iterator j = asu.GetChainList().begin(),
e2 =asu.GetChainList().end(); j != e2; ++j) {
ChainView chain = *j;
int chain_length = chain.GetResidueCount();
if (chain_length < min_polymer_size && seqres.IsValid()) {
seq::SequenceHandle s = seqres.FindSequence(chain.GetName());
if (s.IsValid())
chain_length = s.GetLength();
}
if (chain.IsPolymer() && chain_length >= min_polymer_size) {
if (*curr_chain_name == 0) {
throw std::runtime_error("running out of chain names");
}
ChainHandle new_chain = edi.InsertChain(String(1, curr_chain_name[0]));
curr_chain_name++;
edi.SetChainDescription(new_chain, chain.GetDescription());
edi.SetChainType(new_chain, chain.GetType());
new_chain.SetStringProp("original_name", chain.GetName());
if (chain.HasProp("pdb_auth_chain_name"))
new_chain.SetStringProp("pdb_auth_chain_name",
chain.GetStringProp("pdb_auth_chain_name"));
for (ResidueViewList::const_iterator k = chain.GetResidueList().begin(),
e3 = chain.GetResidueList().end(); k != e3; ++k) {
ResidueHandle new_res = edi.AppendResidue(new_chain, k->GetName(),
k->GetNumber());
dst_to_src_map[new_res] = k->GetHandle();
needs_adjustment |= copy_atoms(*k, new_res, edi, *i);
}
continue;
}
if (chain.GetType() == CHAINTYPE_WATER) {
if (!water_chain) {
water_chain = edi.InsertChain(WATER_CHAIN_NAME);
edi.SetChainDescription(water_chain, chain.GetDescription());
edi.SetChainType(water_chain, chain.GetType());
}
for (ResidueViewList::const_iterator k = chain.GetResidueList().begin(),
e3 = chain.GetResidueList().end(); k != e3; ++k) {
ResidueHandle new_res = edi.AppendResidue(water_chain, k->GetName());
new_res.SetStringProp("type", StringFromChainType(chain.GetType()));
new_res.SetStringProp("description", chain.GetDescription());
dst_to_src_map[new_res] = k->GetHandle();
needs_adjustment |= copy_atoms(*k, new_res, edi, *i);
}
continue;
}
// deal with ligands...
if (!ligand_chain) {
ligand_chain = edi.InsertChain(LIGAND_CHAIN_NAME);
last_rnum = 0;
}
char ins_code = chain.GetResidueCount()>1 ? 'A' : '\0';
for (ResidueViewList::const_iterator k = chain.GetResidueList().begin(),
e3 = chain.GetResidueList().end(); k != e3; ++k) {
ResidueHandle new_res = edi.AppendResidue(ligand_chain, k->GetName(),
ResNum(last_rnum+1, ins_code));
new_res.SetStringProp("type", StringFromChainType(chain.GetType()));
new_res.SetStringProp("description", chain.GetDescription());
new_res.SetStringProp("original_name", chain.GetName());
if (chain.HasProp("pdb_auth_chain_name"))
new_res.SetStringProp("pdb_auth_chain_name",
chain.GetStringProp("pdb_auth_chain_name"));
dst_to_src_map[new_res] = k->GetHandle();
ins_code += 1;
needs_adjustment |= copy_atoms(*k, new_res, edi, *i);
}
last_rnum+=1;
}
}
if (needs_adjustment && shift_to_fit) {
geom::Vec3 min_coord = pdb_bu.GetBounds().GetMin();
geom::Mat4 tf(1, 0, 0, -999 - min_coord[0],
0, 1, 0, -999 - min_coord[1],
0, 0, 1, -999 - min_coord[2],
0, 0, 0, 1);
edi.ApplyTransform(tf);
}
TransferConnectivity(pdb_bu, dst_to_src_map);
return pdb_bu;
}
}}}
//------------------------------------------------------------------------------
// This file is part of the OpenStructure project <www.openstructure.org>
//
// Copyright (C) 2008-2011 by the OpenStructure authors
//
// This library is free software; you can redistribute it and/or modify it under
// the terms of the GNU Lesser General Public License as published by the Free
// Software Foundation; either version 3.0 of the License, or (at your option)
// any later version.
// This library is distributed in the hope that it will be useful, but WITHOUT
// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
// FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more
// details.
//
// You should have received a copy of the GNU Lesser General Public License
// along with this library; if not, write to the Free Software Foundation, Inc.,
// 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
//------------------------------------------------------------------------------
#ifndef OST_MOL_ALG_PDBIZE_HH
#define OST_MOL_ALG_PDBIZE_HH
#include <ost/mol/entity_view.hh>
#include <ost/mol/entity_handle.hh>
#include <ost/seq/sequence_list.hh>
#include "module_config.hh"
namespace ost { namespace mol { namespace alg {
extern const char* POLYPEPTIDE_CHAIN_NAMES;
extern const char* LIGAND_CHAIN_NAME;
extern const char* WATER_CHAIN_NAME;
mol::EntityHandle DLLEXPORT_OST_MOL_ALG
PDBize(mol::EntityView asu, const geom::Mat4List& transforms, seq::SequenceList seqres,
int min_polymer_size=10, bool shift_to_fit=true);
}}}
#endif
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment