From 3544f890c21982b7dfa60f047fc1f8431ff09272 Mon Sep 17 00:00:00 2001 From: B13nch3n <stefan.bienert@me.com> Date: Fri, 31 Jul 2020 06:46:39 +0200 Subject: [PATCH] Fixed PDBize to restore bonds in branched entities correctly. --- modules/mol/base/src/transfer_connectivity.cc | 76 ++++++++- .../base/tests/test_transfer_connectivity.cc | 144 ++++++++++++++++++ 2 files changed, 217 insertions(+), 3 deletions(-) diff --git a/modules/mol/base/src/transfer_connectivity.cc b/modules/mol/base/src/transfer_connectivity.cc index b7536f487..448fde12e 100644 --- a/modules/mol/base/src/transfer_connectivity.cc +++ b/modules/mol/base/src/transfer_connectivity.cc @@ -73,6 +73,32 @@ public: return false; } + bool CheckInsertionCode(ResidueHandle chk_res, ResidueHandle src_res, + ResidueHandle dst_res) { + // This is a hack to make GetDestAtomForSrcAtom work with mol::alg::PDBize, + // maybe with insertion codes in general... + // Depending on the value of min_polymer_size, PDBize puts small polymers in + // the ligand chain called '_'. The original chain is annotated by all + // residues having the same residue number but different insertion codes. + // That can lead to the same issue described further down for branched + // entities. Basically the problem is that in branched entities the residues + // do not need to be connected with their immediate neighbours breaking the + // original mechanics of GetDestAtomForSrcAtom. + // check if residue has an insertion code + if (chk_res.GetNumber().GetInsCode() != '\0') { + // check if the original residue also had no inscode + if (src_res.GetNumber().GetInsCode() == '\0') { + // There must be another residue with the same number and since + // this function is only used by PDBize, that residue originates + // from the same chain as dst_res and may be the correct partner. + if (chk_res.GetNumber().GetNum() != dst_res.GetNumber().GetNum()) { + return false; + } + } + } + return true; + } + AtomHandle GetDestAtomForSrcAtom(AtomHandle src_atom, ResidueHandle src_res, ResidueHandle dst_res, const std::map<String, AtomHandle>& name_to_atom) { @@ -87,19 +113,61 @@ public: j = to_from_->find(dst_res.GetPrev()); if (j != to_from_->end()) { if (j->second == r) { - return j->first.FindAtom(src_atom.GetName()); + if (CheckInsertionCode( + j->first.FindAtom(src_atom.GetName()).GetResidue(), + r, dst_res)) { + return j->first.FindAtom(src_atom.GetName()); + } } } j = to_from_->find(dst_res.GetNext()); if (j != to_from_->end()) { if (j->second == r) { - return j->first.FindAtom(src_atom.GetName()); + if (CheckInsertionCode( + j->first.FindAtom(src_atom.GetName()).GetResidue(), + r, dst_res)) { + return j->first.FindAtom(src_atom.GetName()); + } } } // still nothing. scan linearly through all residues. for ( j = to_from_->begin(); j != to_from_->end(); ++j) { if (j->second == r) { - return j->first.FindAtom(src_atom.GetName()); + // Check that we are connecting to the same chain, otherwise we need to + // check that the found residue also connects outwards. + if (j->first.GetChain() == dst_res.GetChain()) { + if (CheckInsertionCode( + j->first.FindAtom(src_atom.GetName()).GetResidue(), + r, dst_res)) { + return j->first.FindAtom(src_atom.GetName()); + } + } + // Using the found residue would mean connecting to a different chain + // which is unusual. So we make sure that in the residue we transfer + // from, there is also a bond to a different chain. Otherwise the + // search continues. + // For branched mmCIF entities where connected residues may be not + // direct neighbours since branched entities are non-linear, that gave + // a problem when a bio unit doubled an oligosaccharide and the search + // found the bond of the copied oligosaccharide first. In that case the + // copied oligosaccharide is a different chain and the residue from the + // same chain comes later in the list. + BondHandleList jbonds = src_atom.GetBondList(); + for (BondHandleList::iterator k = jbonds.begin(), e2 = jbonds.end(); + k !=e2; ++k) { + // determine which "side" the src_res sits on + if (k->GetFirst() == src_atom) { + if (src_atom.GetResidue().GetChain() + != k->GetSecond().GetResidue().GetChain()) { + return j->first.FindAtom(src_atom.GetName()); + } + continue; + } + if (src_atom.GetResidue().GetChain() + != k->GetFirst().GetResidue().GetChain()) { + return j->first.FindAtom(src_atom.GetName()); + } + } } } return AtomHandle(); @@ -117,3 +185,5 @@ void TransferConnectivity(EntityHandle dest, } }} + +// LocalWords: PDBize ligand diff --git a/modules/mol/base/tests/test_transfer_connectivity.cc b/modules/mol/base/tests/test_transfer_connectivity.cc index 36ad6dc2c..e97be09fd 100644 --- a/modules/mol/base/tests/test_transfer_connectivity.cc +++ b/modules/mol/base/tests/test_transfer_connectivity.cc @@ -72,6 +72,150 @@ BOOST_AUTO_TEST_CASE(test_transfer_conn) BOOST_CHECK_EQUAL(12, ent2.GetBondCount()); } +BOOST_AUTO_TEST_CASE(test_transfer_two_from_one_chain) +{ + // Test that if, like by PDBize, a single chain is used to connect two chains, + // all the bonds stay in the right chain. + EntityHandle src = CreateEntity(); + XCSEditor edi = src.EditXCS(); + ChainHandle fst_chain = edi.InsertChain(String("A")); + ResidueHandle bma1 = edi.AppendResidue(fst_chain, "BMA"); + ResidueHandle man1 = edi.AppendResidue(fst_chain, "MAN"); + ResidueHandle man2 = edi.AppendResidue(fst_chain, "MAN"); + + AtomHandle bma_o3; + AtomHandle bma_o6; + AtomHandle man_c1; + + bma_o3 = edi.InsertAtom(bma1, "O3", geom::Vec3(9.646, -29.415, 33.307)); + bma_o6 = edi.InsertAtom(bma1, "O6", geom::Vec3(8.642, -28.695, 27.405)); + + man_c1 = edi.InsertAtom(man1, "C1", geom::Vec3(10.253, -30.563, 33.946)); + edi.Connect(bma_o3, man_c1); + + man_c1 = edi.InsertAtom(man2, "C1", geom::Vec3(8.091, -29.369, 26.233)); + edi.Connect(bma_o6, man_c1); + + EntityHandle dst = CreateEntity(); + edi = dst.EditXCS(); + fst_chain = edi.InsertChain(String("B")); + bma1 = edi.AppendResidue(fst_chain, "BMA"); + man1 = edi.AppendResidue(fst_chain, "MAN"); + man2 = edi.AppendResidue(fst_chain, "MAN"); + + edi.InsertAtom(bma1, "O3", geom::Vec3(9.646, -29.415, 33.307)); + edi.InsertAtom(bma1, "O6", geom::Vec3(8.642, -28.695, 27.405)); + + edi.InsertAtom(man1, "C1", geom::Vec3(10.253, -30.563, 33.946)); + + edi.InsertAtom(man2, "C1", geom::Vec3(8.091, -29.369, 26.233)); + + ChainHandle snd_chain = edi.InsertChain(String("C")); + bma1 = edi.AppendResidue(snd_chain, "BMA"); + man1 = edi.AppendResidue(snd_chain, "MAN"); + man2 = edi.AppendResidue(snd_chain, "MAN"); + + edi.InsertAtom(bma1, "O3", geom::Vec3(-9.646, 29.415, 33.307)); + edi.InsertAtom(bma1, "O6", geom::Vec3(-8.642, 28.695, 27.405)); + + edi.InsertAtom(man1, "C1", geom::Vec3(-10.253, 30.563, 33.946)); + + edi.InsertAtom(man2, "C1", geom::Vec3(-8.091, 29.369, 26.233)); + + std::map<ResidueHandle, ResidueHandle> to_from; + ResidueHandleList r1 = src.GetResidueList(); + ResidueHandleList r2 = fst_chain.GetResidueList(); + for (size_t i = 0; i < r1.size(); ++i) { + to_from[r2[i]] = r1[i]; + } + r2 = snd_chain.GetResidueList(); + for (size_t i = 0; i < r1.size(); ++i) { + to_from[r2[i]] = r1[i]; + } + + TransferConnectivity(dst, to_from); + BOOST_CHECK(BondExists(dst.FindAtom("B", 1, "O3"), + dst.FindAtom("B", 2, "C1"))); + BOOST_CHECK(BondExists(dst.FindAtom("B", 1, "O6"), + dst.FindAtom("B", 3, "C1"))); + BOOST_CHECK(BondExists(dst.FindAtom("C", 1, "O3"), + dst.FindAtom("C", 2, "C1"))); + BOOST_CHECK(BondExists(dst.FindAtom("C", 1, "O6"), + dst.FindAtom("C", 3, "C1"))); + BOOST_CHECK_EQUAL(BondExists(dst.FindAtom("C", 1, "O6"), + dst.FindAtom("B", 3, "C1")), false); + BOOST_CHECK_EQUAL(BondExists(dst.FindAtom("B", 1, "O6"), + dst.FindAtom("C", 3, "C1")), false); +} + +BOOST_AUTO_TEST_CASE(test_transfer_two_combined_one_chain) +{ + // Test that if, like by PDBize, a single chain ends up two times in '_', the + // connectivity does not get messy. + EntityHandle src = CreateEntity(); + XCSEditor edi = src.EditXCS(); + ChainHandle fst_chain = edi.InsertChain(String("A")); + ResidueHandle bma1 = edi.AppendResidue(fst_chain, "BMA"); + ResidueHandle man1 = edi.AppendResidue(fst_chain, "MAN"); + ResidueHandle man2 = edi.AppendResidue(fst_chain, "MAN"); + + AtomHandle bma_o3; + AtomHandle bma_o6; + AtomHandle man_c1; + + bma_o3 = edi.InsertAtom(bma1, "O3", geom::Vec3(9.646, -29.415, 33.307)); + bma_o6 = edi.InsertAtom(bma1, "O6", geom::Vec3(8.642, -28.695, 27.405)); + + man_c1 = edi.InsertAtom(man1, "C1", geom::Vec3(10.253, -30.563, 33.946)); + edi.Connect(bma_o3, man_c1); + + man_c1 = edi.InsertAtom(man2, "C1", geom::Vec3(8.091, -29.369, 26.233)); + edi.Connect(bma_o6, man_c1); + + EntityHandle dst = CreateEntity(); + edi = dst.EditXCS(); + fst_chain = edi.InsertChain(String("_")); + bma1 = edi.AppendResidue(fst_chain, "BMA", ResNum(1, 'A')); + man1 = edi.AppendResidue(fst_chain, "MAN", ResNum(1, 'B')); + man2 = edi.AppendResidue(fst_chain, "MAN", ResNum(1, 'C')); + + edi.InsertAtom(bma1, "O3", geom::Vec3(9.646, -29.415, 33.307)); + edi.InsertAtom(bma1, "O6", geom::Vec3(8.642, -28.695, 27.405)); + edi.InsertAtom(man1, "C1", geom::Vec3(10.253, -30.563, 33.946)); + edi.InsertAtom(man2, "C1", geom::Vec3(8.091, -29.369, 26.233)); + + bma1 = edi.AppendResidue(fst_chain, "BMA", ResNum(2, 'A')); + man1 = edi.AppendResidue(fst_chain, "MAN", ResNum(2, 'B')); + man2 = edi.AppendResidue(fst_chain, "MAN", ResNum(2, 'C')); + + edi.InsertAtom(bma1, "O3", geom::Vec3(-9.646, 29.415, 33.307)); + edi.InsertAtom(bma1, "O6", geom::Vec3(-8.642, 28.695, 27.405)); + edi.InsertAtom(man1, "C1", geom::Vec3(-10.253, 30.563, 33.946)); + edi.InsertAtom(man2, "C1", geom::Vec3(-8.091, 29.369, 26.233)); + + std::map<ResidueHandle, ResidueHandle> to_from; + ResidueHandleList r1 = src.GetResidueList(); + ResidueHandleList r2 = fst_chain.GetResidueList(); + for (size_t i = 0; i < r1.size(); ++i) { + to_from[r2[i]] = r1[i]; + } + for (size_t i = 0; i < r1.size(); ++i) { + to_from[r2[i+3]] = r1[i]; + } + + TransferConnectivity(dst, to_from); + + BOOST_CHECK(BondExists(dst.FindAtom("_", ResNum(1, 'A'), "O3"), + dst.FindAtom("_", ResNum(1, 'B'), "C1"))); + BOOST_CHECK(BondExists(dst.FindAtom("_", ResNum(1, 'A'), "O6"), + dst.FindAtom("_", ResNum(1, 'C'), "C1"))); + BOOST_CHECK(BondExists(dst.FindAtom("_", ResNum(2, 'A'), "O3"), + dst.FindAtom("_", ResNum(2, 'B'), "C1"))); + BOOST_CHECK(BondExists(dst.FindAtom("_", ResNum(2, 'A'), "O6"), + dst.FindAtom("_", ResNum(2, 'C'), "C1"))); + BOOST_CHECK_EQUAL(BondExists(dst.FindAtom("B", ResNum(2, 'A'), "O6"), + dst.FindAtom("C", ResNum(1, 'C'), "C1")), false); +} BOOST_AUTO_TEST_SUITE_END(); -- GitLab