From 7d133a23ba9c14de2c913786cc331d376e36758b Mon Sep 17 00:00:00 2001
From: Stefan Bienert <stefan.bienert@unibas.ch>
Date: Tue, 6 Nov 2012 11:44:42 +0100
Subject: [PATCH] Fixed problem of large bio units in PDBize

---
 modules/io/doc/mmcif.rst     | 10 ++++++++--
 modules/io/pymod/__init__.py | 35 +++++++++++++++++++++++++++++++----
 2 files changed, 39 insertions(+), 6 deletions(-)

diff --git a/modules/io/doc/mmcif.rst b/modules/io/doc/mmcif.rst
index 5d96ba6b8..ccdad63ed 100644
--- a/modules/io/doc/mmcif.rst
+++ b/modules/io/doc/mmcif.rst
@@ -541,7 +541,7 @@ of the annotation available.
 
     See :attr:`operations`
 
-.. function:: PDBize(asu, seqres=None, min_polymer_size=10)
+.. function:: PDBize(asu, seqres=None, min_polymer_size=10, transformation=False)
 
     Returns the biological assembly (bio unit) for an entity. The new entity
     created is well suited to be saved as a PDB file. Therefore the function
@@ -556,6 +556,9 @@ of the annotation available.
       - ligands which resemble a polymer but have less than min_polymer_size
         residues are assigned the same numeric residue number. The residues are
         distinguished by insertion code.
+      - sometimes bio units exceed the coordinate system storable in a PDB file.
+        In that case, the box around the entity will be aligned to the lower
+        left corner of the coordinate system.
 
     Since this function is at the moment mainly used to create biounits from
     mmCIF files to be saved as PDBs, the function assumes that the
@@ -573,6 +576,9 @@ of the annotation available.
       get its own chain. Everything below that number will be sorted into the 
       ligand chain.
     :type min_polymer_size: int
+    :param transformation:  If set, return the transformation matrix used to
+      move the bounding box of the bio unit to the lower left corner.
+    :type transformation: :class:`bool`
 
 .. class:: MMCifInfoStructDetails
 
@@ -942,4 +948,4 @@ of the annotation available.
 ..  LocalWords:  biounits biounit uniprot UNP seqs AddMMCifPDBChainTr cif asym
 ..  LocalWords:  auth GetMMCifPDBChainTr AddPDBCMMCifhainTr GetPDBMMCifChainTr
 ..  LocalWords:  GetRevisions AddRevision SetRevisionsDateOriginal GetSize
-..  LocalWords:  GetNum num GetStatus GetLastDate GetFirstRelease
+..  LocalWords:  GetNum num GetStatus GetLastDate GetFirstRelease storable
diff --git a/modules/io/pymod/__init__.py b/modules/io/pymod/__init__.py
index 71ea2ec6c..44bda643a 100644
--- a/modules/io/pymod/__init__.py
+++ b/modules/io/pymod/__init__.py
@@ -361,8 +361,10 @@ def LoadMMCIF(filename, restrict_chains="", fault_tolerant=None, calpha_only=Non
 # arguement is the usual 'self'.
 # documentation for this function was moved to mmcif.rst,
 # MMCifInfoBioUnit.PDBize, since this function is not included in SPHINX.
-def _PDBize(biounit, asu, seqres=None, min_polymer_size=10):
+def _PDBize(biounit, asu, seqres=None, min_polymer_size=10,
+            transformation=False):
   def _CopyAtoms(src_res, dst_res, edi, trans=geom.Mat4()):
+    atom_pos_wrong = False
     for atom in src_res.atoms:
       tmp_pos = geom.Vec4(atom.pos)
       new_atom=edi.InsertAtom(dst_res, atom.name, geom.Vec3(trans*tmp_pos), 
@@ -370,6 +372,12 @@ def _PDBize(biounit, asu, seqres=None, min_polymer_size=10):
                               occupancy=atom.occupancy, 
                               b_factor=atom.b_factor,
                               is_hetatm=atom.is_hetatom)
+      for p in range(0,3):
+        if new_atom.pos[p] <= -1000:
+          atom_pos_wrong = True
+        elif new_atom.pos[p] >= 10000:
+          atom_pos_wrong = True
+    return atom_pos_wrong
 
   chain_names='ABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789abcdefghijklmnopqrstuvwxyz'
   # create list of operations
@@ -409,6 +417,7 @@ def _PDBize(biounit, asu, seqres=None, min_polymer_size=10):
   cur_chain_name = 0
   water_chain = mol.ChainHandle()
   ligand_chain = mol.ChainHandle()
+  a_pos_wrong = False
   for tr in trans_matrices:
     # do a PDBize, add each new entity to the end product
     for chain in assu.chains:
@@ -430,7 +439,9 @@ def _PDBize(biounit, asu, seqres=None, min_polymer_size=10):
                                   chain.GetStringProp("pdb_auth_chain_name"))
         for res in chain.residues:
           new_res = edi.AppendResidue(new_chain, res.name, res.number)
-          _CopyAtoms(res, new_res, edi, tr)
+          a_b = _CopyAtoms(res, new_res, edi, tr)
+          if not a_pos_wrong:
+            a_pos_wrong = a_b
       elif chain.type == mol.CHAINTYPE_WATER:
         if not water_chain.IsValid():
           # water gets '-' as name
@@ -441,7 +452,9 @@ def _PDBize(biounit, asu, seqres=None, min_polymer_size=10):
           new_res = edi.AppendResidue(water_chain, res.name)
           new_res.SetStringProp('type', mol.StringFromChainType(chain.type))
           new_res.SetStringProp('description', chain.description)
-          _CopyAtoms(res, new_res, edi, tr)
+          a_b = _CopyAtoms(res, new_res, edi, tr)
+          if not a_pos_wrong:
+            a_pos_wrong = a_b
       else:
         if not ligand_chain.IsValid():
           # all ligands, put in one chain, are named '_'
@@ -463,8 +476,22 @@ def _PDBize(biounit, asu, seqres=None, min_polymer_size=10):
             new_res.SetStringProp("pdb_auth_chain_name",
                                   chain.GetStringProp("pdb_auth_chain_name"))
           ins_code = chr(ord(ins_code)+1)
-          _CopyAtoms(res, new_res, edi, tr)
+          a_b = _CopyAtoms(res, new_res, edi, tr)
+          if not a_pos_wrong:
+            a_pos_wrong = a_b
+  move_to_origin = None
+  if a_pos_wrong:
+    print "IN"
+    start = pdb_bu.bounds.min
+    move_to_origin = geom.Mat4(1,0,0,(-999 - start[0]),
+                               0,1,0,(-999 - start[1]),
+                               0,0,1,(-999 - start[2]),
+                               0,0,0,1)
+    edi = pdb_bu.EditXCS(mol.UNBUFFERED_EDIT)
+    edi.ApplyTransform(move_to_origin)
   conop.ConnectAll(pdb_bu)
+  if transformation:
+    return pdb_bu, move_to_origin
   return pdb_bu
 
 MMCifInfoBioUnit.PDBize = _PDBize
-- 
GitLab