From 529ccde9e92097f226e4aa3e40ac4113ff7b24ee Mon Sep 17 00:00:00 2001 From: Gabriel Studer <gabriel.studer@unibas.ch> Date: Tue, 2 Jan 2024 13:01:21 +0100 Subject: [PATCH] biounits: naming conflicts of AU chains with identity transform applied Chain naming in biounit construction reserves 1.<au_cname> for the respective AU chain with identity transform applied. As of January 2024, there are 3 PDB entries (8qn6, 8x1h, 2c0x) where the same AU chain with identity transform occurs several times in the same biounit. This is likely an error in the respective mmCIF files as the resulting chains sit on top of each other. OST just names the FIRST occurence as 1.<au_cname>. A warning for that behaviour is also added to the documentation --- modules/mol/alg/doc/molalg.rst | 10 ++++++++++ modules/mol/alg/src/biounit.cc | 19 ++++++++++--------- 2 files changed, 20 insertions(+), 9 deletions(-) diff --git a/modules/mol/alg/doc/molalg.rst b/modules/mol/alg/doc/molalg.rst index 366b1cb83..6cb57ea40 100644 --- a/modules/mol/alg/doc/molalg.rst +++ b/modules/mol/alg/doc/molalg.rst @@ -1474,6 +1474,16 @@ from a :class:`ost.io.MMCifInfoBioUnit` or the derived is reserved for the original AU chain with identity transform (read: no transform) applied. If a certain AU chain only occurs with an actual transform applied, numbering starts at 2. + + .. warning:: + There is the (rare) possibility that a AU chain that has only identity + transform applied is not named 1.<au_cname>. + As of january 2024, there are 3 pdb entries (8qn6, 8x1h, 2c0x) where + the same AU chain with identity transform occurs several times in the same + biounit. This is likely an error in the respective mmCIF files as the + resulting chains sit on top of each other. OST just names the FIRST + occurence as 1.<au_cname>. + :param asu: The assymetric unit :type asu: :class:`ost.mol.EntityHandle` diff --git a/modules/mol/alg/src/biounit.cc b/modules/mol/alg/src/biounit.cc index 74efb5299..e5fd1644f 100644 --- a/modules/mol/alg/src/biounit.cc +++ b/modules/mol/alg/src/biounit.cc @@ -265,7 +265,6 @@ ost::mol::EntityHandle CreateBU(const ost::mol::EntityHandle& asu, // process all transformations for(uint t_idx = 0; t_idx < transforms[chain_intvl].size(); ++t_idx) { const geom::Mat4& m = transforms[chain_intvl][t_idx]; - // check if m is identity matrix => no transformation applied bool is_identity = true; geom::Mat4 identity_matrix = geom::Mat4::Identity(); @@ -285,16 +284,18 @@ ost::mol::EntityHandle CreateBU(const ost::mol::EntityHandle& asu, String au_cname = au_chains[chain_intvl][c_idx]; std::stringstream bu_cname_ss; - if(is_identity) { - if(au_chain_copies.find(au_cname) != au_chain_copies.end()) { - std::stringstream err; - err<<"Try to insert copy of AU chain "<<au_cname<<" with identity "; - err<<"transform, i.e. copy the raw coordinates. This has already "; - err<<"been done for this AU chain and there can only be one."; - throw ost::Error(err.str()); - } + if(is_identity && au_chain_copies.find(au_cname) == au_chain_copies.end()) { bu_cname_ss << "1." << au_cname; // 1.<au_cname> reserved for AU chain // without transformation + // at least the first of it... + // as of January 2024, there were 3 + // entries (8qn6, 8x1h, 2c0x) where + // the identity transform is applied + // more than once on the same AU + // chain, effectively leading to + // chains sitting on top of each + // other... But hey, bullshit in, + // bullshit out au_chain_copies.insert(au_cname); } else { if(chain_counter.find(au_cname) == chain_counter.end()) { -- GitLab