From 3544f890c21982b7dfa60f047fc1f8431ff09272 Mon Sep 17 00:00:00 2001
From: B13nch3n <stefan.bienert@me.com>
Date: Fri, 31 Jul 2020 06:46:39 +0200
Subject: [PATCH] Fixed PDBize to restore bonds in branched entities correctly.

---
 modules/mol/base/src/transfer_connectivity.cc |  76 ++++++++-
 .../base/tests/test_transfer_connectivity.cc  | 144 ++++++++++++++++++
 2 files changed, 217 insertions(+), 3 deletions(-)

diff --git a/modules/mol/base/src/transfer_connectivity.cc b/modules/mol/base/src/transfer_connectivity.cc
index b7536f487..448fde12e 100644
--- a/modules/mol/base/src/transfer_connectivity.cc
+++ b/modules/mol/base/src/transfer_connectivity.cc
@@ -73,6 +73,32 @@ public:
     return false;
   }
 
+  bool CheckInsertionCode(ResidueHandle chk_res, ResidueHandle src_res,
+                          ResidueHandle dst_res) {
+    // This is a hack to make GetDestAtomForSrcAtom work with mol::alg::PDBize,
+    // maybe with insertion codes in general...
+    // Depending on the value of min_polymer_size, PDBize puts small polymers in
+    // the ligand chain called '_'. The original chain is annotated by all
+    // residues having the same residue number but different insertion codes.
+    // That can lead to the same issue described further down for branched
+    // entities. Basically the problem is that in branched entities the residues
+    // do not need to be connected with their immediate neighbours breaking the
+    // original mechanics of GetDestAtomForSrcAtom.
+    // check if residue has an insertion code
+    if (chk_res.GetNumber().GetInsCode() != '\0') {
+      // check if the original residue also had no inscode
+      if (src_res.GetNumber().GetInsCode() == '\0') {
+        // There must be another residue with the same number and since
+        // this function is only used by PDBize, that residue originates
+        // from the same chain as dst_res and may be the correct partner.
+        if (chk_res.GetNumber().GetNum() != dst_res.GetNumber().GetNum()) {
+          return false;
+        }
+      }
+    }
+    return true;
+  }
+
   AtomHandle GetDestAtomForSrcAtom(AtomHandle src_atom, ResidueHandle src_res,
                                    ResidueHandle dst_res,
                                    const std::map<String, AtomHandle>& name_to_atom) {
@@ -87,19 +113,61 @@ public:
     j = to_from_->find(dst_res.GetPrev());
     if (j != to_from_->end()) {
       if (j->second == r) {
-        return j->first.FindAtom(src_atom.GetName());
+        if (CheckInsertionCode(
+                              j->first.FindAtom(src_atom.GetName()).GetResidue(),
+                              r, dst_res)) {
+          return j->first.FindAtom(src_atom.GetName());
+        }
       }
     }
     j = to_from_->find(dst_res.GetNext());
     if (j != to_from_->end()) {
       if (j->second == r) {
-        return j->first.FindAtom(src_atom.GetName());
+        if (CheckInsertionCode(
+                              j->first.FindAtom(src_atom.GetName()).GetResidue(),
+                              r, dst_res)) {
+            return j->first.FindAtom(src_atom.GetName());
+          }
       }
     }
     // still nothing. scan linearly through all residues.
     for ( j = to_from_->begin(); j != to_from_->end(); ++j) {
       if (j->second == r) {
-        return j->first.FindAtom(src_atom.GetName());
+        // Check that we are connecting to the same chain, otherwise we need to
+        // check that the found residue also connects outwards.
+        if (j->first.GetChain() == dst_res.GetChain()) {
+          if (CheckInsertionCode(
+                              j->first.FindAtom(src_atom.GetName()).GetResidue(),
+                              r, dst_res)) {
+            return j->first.FindAtom(src_atom.GetName());
+          }
+        }
+        // Using the found residue would mean connecting to a different chain
+        // which is unusual. So we make sure that in the residue we transfer
+        // from, there is also a bond to a different chain. Otherwise the
+        // search continues.
+        // For branched mmCIF entities where connected residues may be not
+        // direct neighbours since branched entities are non-linear, that gave
+        // a problem when a bio unit doubled an oligosaccharide and the search
+        // found the bond of the copied oligosaccharide first. In that case the
+        // copied oligosaccharide is a different chain and the residue from the
+        // same chain comes later in the list.
+        BondHandleList jbonds = src_atom.GetBondList();
+        for (BondHandleList::iterator k = jbonds.begin(), e2 = jbonds.end();
+             k !=e2; ++k) {
+          // determine which "side" the src_res sits on
+          if (k->GetFirst() == src_atom) {
+            if (src_atom.GetResidue().GetChain()
+                != k->GetSecond().GetResidue().GetChain()) {
+              return j->first.FindAtom(src_atom.GetName());
+            }
+            continue;
+          }
+          if (src_atom.GetResidue().GetChain()
+              != k->GetFirst().GetResidue().GetChain()) {
+            return j->first.FindAtom(src_atom.GetName());
+          }
+        }
       }
     }
     return AtomHandle();
@@ -117,3 +185,5 @@ void TransferConnectivity(EntityHandle dest,
 }
 
 }}
+
+//  LocalWords:  PDBize ligand
diff --git a/modules/mol/base/tests/test_transfer_connectivity.cc b/modules/mol/base/tests/test_transfer_connectivity.cc
index 36ad6dc2c..e97be09fd 100644
--- a/modules/mol/base/tests/test_transfer_connectivity.cc
+++ b/modules/mol/base/tests/test_transfer_connectivity.cc
@@ -72,6 +72,150 @@ BOOST_AUTO_TEST_CASE(test_transfer_conn)
   BOOST_CHECK_EQUAL(12, ent2.GetBondCount());
 }
 
+BOOST_AUTO_TEST_CASE(test_transfer_two_from_one_chain)
+{
+  // Test that if, like by PDBize, a single chain is used to connect two chains,
+  // all the bonds stay in the right chain.
+  EntityHandle src = CreateEntity();
+  XCSEditor edi = src.EditXCS();
+  ChainHandle fst_chain = edi.InsertChain(String("A"));
+  ResidueHandle bma1 = edi.AppendResidue(fst_chain, "BMA");
+  ResidueHandle man1 = edi.AppendResidue(fst_chain, "MAN");
+  ResidueHandle man2 = edi.AppendResidue(fst_chain, "MAN");
+
+  AtomHandle bma_o3;
+  AtomHandle bma_o6;
+  AtomHandle man_c1;
+
+  bma_o3 = edi.InsertAtom(bma1, "O3", geom::Vec3(9.646, -29.415, 33.307));
+  bma_o6 = edi.InsertAtom(bma1, "O6", geom::Vec3(8.642, -28.695, 27.405));
+
+  man_c1 = edi.InsertAtom(man1, "C1", geom::Vec3(10.253, -30.563, 33.946));
+  edi.Connect(bma_o3, man_c1);
+
+  man_c1 = edi.InsertAtom(man2, "C1", geom::Vec3(8.091, -29.369, 26.233));
+  edi.Connect(bma_o6, man_c1);
+
+  EntityHandle dst = CreateEntity();
+  edi = dst.EditXCS();
+  fst_chain = edi.InsertChain(String("B"));
+  bma1 = edi.AppendResidue(fst_chain, "BMA");
+  man1 = edi.AppendResidue(fst_chain, "MAN");
+  man2 = edi.AppendResidue(fst_chain, "MAN");
+
+  edi.InsertAtom(bma1, "O3", geom::Vec3(9.646, -29.415, 33.307));
+  edi.InsertAtom(bma1, "O6", geom::Vec3(8.642, -28.695, 27.405));
+
+  edi.InsertAtom(man1, "C1", geom::Vec3(10.253, -30.563, 33.946));
+
+  edi.InsertAtom(man2, "C1", geom::Vec3(8.091, -29.369, 26.233));
+
+  ChainHandle snd_chain = edi.InsertChain(String("C"));
+  bma1 = edi.AppendResidue(snd_chain, "BMA");
+  man1 = edi.AppendResidue(snd_chain, "MAN");
+  man2 = edi.AppendResidue(snd_chain, "MAN");
+
+  edi.InsertAtom(bma1, "O3", geom::Vec3(-9.646, 29.415, 33.307));
+  edi.InsertAtom(bma1, "O6", geom::Vec3(-8.642, 28.695, 27.405));
+
+  edi.InsertAtom(man1, "C1", geom::Vec3(-10.253, 30.563, 33.946));
+
+  edi.InsertAtom(man2, "C1", geom::Vec3(-8.091, 29.369, 26.233));
+
+  std::map<ResidueHandle, ResidueHandle> to_from;
+  ResidueHandleList r1 = src.GetResidueList();
+  ResidueHandleList r2 = fst_chain.GetResidueList();
+  for (size_t i = 0; i < r1.size(); ++i) {
+    to_from[r2[i]] = r1[i];
+  }
+  r2 = snd_chain.GetResidueList();
+  for (size_t i = 0; i < r1.size(); ++i) {
+    to_from[r2[i]] = r1[i];
+  }
+
+  TransferConnectivity(dst, to_from);
 
+  BOOST_CHECK(BondExists(dst.FindAtom("B", 1, "O3"),
+                         dst.FindAtom("B", 2, "C1")));
+  BOOST_CHECK(BondExists(dst.FindAtom("B", 1, "O6"),
+                         dst.FindAtom("B", 3, "C1")));
+  BOOST_CHECK(BondExists(dst.FindAtom("C", 1, "O3"),
+                         dst.FindAtom("C", 2, "C1")));
+  BOOST_CHECK(BondExists(dst.FindAtom("C", 1, "O6"),
+                         dst.FindAtom("C", 3, "C1")));
+  BOOST_CHECK_EQUAL(BondExists(dst.FindAtom("C", 1, "O6"),
+                               dst.FindAtom("B", 3, "C1")), false);
+  BOOST_CHECK_EQUAL(BondExists(dst.FindAtom("B", 1, "O6"),
+                               dst.FindAtom("C", 3, "C1")), false);
+}
+
+BOOST_AUTO_TEST_CASE(test_transfer_two_combined_one_chain)
+{
+  // Test that if, like by PDBize, a single chain ends up two times in '_', the
+  // connectivity does not get messy.
+  EntityHandle src = CreateEntity();
+  XCSEditor edi = src.EditXCS();
+  ChainHandle fst_chain = edi.InsertChain(String("A"));
+  ResidueHandle bma1 = edi.AppendResidue(fst_chain, "BMA");
+  ResidueHandle man1 = edi.AppendResidue(fst_chain, "MAN");
+  ResidueHandle man2 = edi.AppendResidue(fst_chain, "MAN");
+
+  AtomHandle bma_o3;
+  AtomHandle bma_o6;
+  AtomHandle man_c1;
+
+  bma_o3 = edi.InsertAtom(bma1, "O3", geom::Vec3(9.646, -29.415, 33.307));
+  bma_o6 = edi.InsertAtom(bma1, "O6", geom::Vec3(8.642, -28.695, 27.405));
+
+  man_c1 = edi.InsertAtom(man1, "C1", geom::Vec3(10.253, -30.563, 33.946));
+  edi.Connect(bma_o3, man_c1);
+
+  man_c1 = edi.InsertAtom(man2, "C1", geom::Vec3(8.091, -29.369, 26.233));
+  edi.Connect(bma_o6, man_c1);
+
+  EntityHandle dst = CreateEntity();
+  edi = dst.EditXCS();
+  fst_chain = edi.InsertChain(String("_"));
+  bma1 = edi.AppendResidue(fst_chain, "BMA", ResNum(1, 'A'));
+  man1 = edi.AppendResidue(fst_chain, "MAN", ResNum(1, 'B'));
+  man2 = edi.AppendResidue(fst_chain, "MAN", ResNum(1, 'C'));
+
+  edi.InsertAtom(bma1, "O3", geom::Vec3(9.646, -29.415, 33.307));
+  edi.InsertAtom(bma1, "O6", geom::Vec3(8.642, -28.695, 27.405));
+  edi.InsertAtom(man1, "C1", geom::Vec3(10.253, -30.563, 33.946));
+  edi.InsertAtom(man2, "C1", geom::Vec3(8.091, -29.369, 26.233));
+
+  bma1 = edi.AppendResidue(fst_chain, "BMA", ResNum(2, 'A'));
+  man1 = edi.AppendResidue(fst_chain, "MAN", ResNum(2, 'B'));
+  man2 = edi.AppendResidue(fst_chain, "MAN", ResNum(2, 'C'));
+
+  edi.InsertAtom(bma1, "O3", geom::Vec3(-9.646, 29.415, 33.307));
+  edi.InsertAtom(bma1, "O6", geom::Vec3(-8.642, 28.695, 27.405));
+  edi.InsertAtom(man1, "C1", geom::Vec3(-10.253, 30.563, 33.946));
+  edi.InsertAtom(man2, "C1", geom::Vec3(-8.091, 29.369, 26.233));
+
+  std::map<ResidueHandle, ResidueHandle> to_from;
+  ResidueHandleList r1 = src.GetResidueList();
+  ResidueHandleList r2 = fst_chain.GetResidueList();
+  for (size_t i = 0; i < r1.size(); ++i) {
+    to_from[r2[i]] = r1[i];
+  }
+  for (size_t i = 0; i < r1.size(); ++i) {
+    to_from[r2[i+3]] = r1[i];
+  }
+
+  TransferConnectivity(dst, to_from);
+
+  BOOST_CHECK(BondExists(dst.FindAtom("_", ResNum(1, 'A'), "O3"),
+                         dst.FindAtom("_", ResNum(1, 'B'), "C1")));
+  BOOST_CHECK(BondExists(dst.FindAtom("_", ResNum(1, 'A'), "O6"),
+                         dst.FindAtom("_", ResNum(1, 'C'), "C1")));
+  BOOST_CHECK(BondExists(dst.FindAtom("_", ResNum(2, 'A'), "O3"),
+                         dst.FindAtom("_", ResNum(2, 'B'), "C1")));
+  BOOST_CHECK(BondExists(dst.FindAtom("_", ResNum(2, 'A'), "O6"),
+                         dst.FindAtom("_", ResNum(2, 'C'), "C1")));
+  BOOST_CHECK_EQUAL(BondExists(dst.FindAtom("B", ResNum(2, 'A'), "O6"),
+                               dst.FindAtom("C", ResNum(1, 'C'), "C1")), false);
+}
 
 BOOST_AUTO_TEST_SUITE_END();
-- 
GitLab