From 529ccde9e92097f226e4aa3e40ac4113ff7b24ee Mon Sep 17 00:00:00 2001
From: Gabriel Studer <gabriel.studer@unibas.ch>
Date: Tue, 2 Jan 2024 13:01:21 +0100
Subject: [PATCH] biounits: naming conflicts of AU chains with identity
 transform applied

Chain naming in biounit construction reserves 1.<au_cname> for the
respective AU chain with identity transform applied. As of January
2024, there are 3 PDB entries (8qn6, 8x1h, 2c0x) where the same AU
chain with identity transform occurs several times in the same
biounit. This is likely an error in the respective mmCIF files as
the resulting chains sit on top of each other. OST just names the
FIRST occurence as 1.<au_cname>.

A warning for that behaviour is also added to the documentation
---
 modules/mol/alg/doc/molalg.rst | 10 ++++++++++
 modules/mol/alg/src/biounit.cc | 19 ++++++++++---------
 2 files changed, 20 insertions(+), 9 deletions(-)

diff --git a/modules/mol/alg/doc/molalg.rst b/modules/mol/alg/doc/molalg.rst
index 366b1cb83..6cb57ea40 100644
--- a/modules/mol/alg/doc/molalg.rst
+++ b/modules/mol/alg/doc/molalg.rst
@@ -1474,6 +1474,16 @@ from a :class:`ost.io.MMCifInfoBioUnit` or the derived
   is reserved for the original AU chain with identity transform (read: no
   transform) applied. If a certain AU chain only occurs with an actual
   transform applied, numbering starts at 2.
+  
+  .. warning::
+    There is the (rare) possibility that a AU chain that has only identity
+    transform applied is not named 1.<au_cname>.
+    As of january 2024, there are 3 pdb entries (8qn6, 8x1h, 2c0x) where
+    the same AU chain with identity transform occurs several times in the same
+    biounit. This is likely an error in the respective mmCIF files as the
+    resulting chains sit on top of each other. OST just names the FIRST
+    occurence as 1.<au_cname>.
+    
 
   :param asu: The assymetric unit
   :type asu: :class:`ost.mol.EntityHandle`
diff --git a/modules/mol/alg/src/biounit.cc b/modules/mol/alg/src/biounit.cc
index 74efb5299..e5fd1644f 100644
--- a/modules/mol/alg/src/biounit.cc
+++ b/modules/mol/alg/src/biounit.cc
@@ -265,7 +265,6 @@ ost::mol::EntityHandle CreateBU(const ost::mol::EntityHandle& asu,
     // process all transformations
     for(uint t_idx = 0; t_idx < transforms[chain_intvl].size(); ++t_idx) {
       const geom::Mat4& m = transforms[chain_intvl][t_idx];
-
       // check if m is identity matrix => no transformation applied
       bool is_identity = true;
       geom::Mat4 identity_matrix = geom::Mat4::Identity();
@@ -285,16 +284,18 @@ ost::mol::EntityHandle CreateBU(const ost::mol::EntityHandle& asu,
         String au_cname = au_chains[chain_intvl][c_idx];
 
         std::stringstream bu_cname_ss;
-        if(is_identity) {
-          if(au_chain_copies.find(au_cname) != au_chain_copies.end()) {
-            std::stringstream err;
-            err<<"Try to insert copy of AU chain "<<au_cname<<" with identity ";
-            err<<"transform, i.e. copy the raw coordinates. This has already ";
-            err<<"been done for this AU chain and there can only be one.";
-            throw ost::Error(err.str());
-          }
+        if(is_identity && au_chain_copies.find(au_cname) == au_chain_copies.end()) {
           bu_cname_ss << "1." << au_cname; // 1.<au_cname> reserved for AU chain
                                            // without transformation
+                                           // at least the first of it...
+                                           // as of January 2024, there were 3
+                                           // entries (8qn6, 8x1h, 2c0x) where
+                                           // the identity transform is applied
+                                           // more than once on the same AU
+                                           // chain, effectively leading to
+                                           // chains sitting on top of each
+                                           // other... But hey, bullshit in,
+                                           // bullshit out
           au_chain_copies.insert(au_cname);
         } else {
           if(chain_counter.find(au_cname) == chain_counter.end()) {
-- 
GitLab