From be7342dfa455e0ffe4417e398cb50632cb196dd3 Mon Sep 17 00:00:00 2001 From: Gabriel Studer <gabriel.studer@unibas.ch> Date: Tue, 2 Jan 2024 16:38:11 +0100 Subject: [PATCH] mmcif writer: Heuristic for different _chem_comp.type for same compound name The MMCifWriter stores globally the types of all residues in a structure. We do a consistency check that the same residue name always has the same type. However, there were special cases that we observed in the PDB. One example is entry 2YHX which has plenty of UNK. UNK is 'L-PEPTIDE LINKING' according to components dictionary. However, OST reads that info from the components dictionary AND checks whether there are additional unknown atoms. If yes, ChemClass is set to UNKNOWN which results in _chem_comp.type OTHER. In the case of 2YHX we now have a mix of "L-PEPTIDE-LINKING" and "OTHER". The added heuristic allows such a mix of two type, if one of the types is "OTHER" and then stores the one that is NOT "OTHER". In this case "L-PEPTIDE-LINKING" --- modules/io/src/mol/mmcif_writer.cc | 22 +++++++++++++++++++--- 1 file changed, 19 insertions(+), 3 deletions(-) diff --git a/modules/io/src/mol/mmcif_writer.cc b/modules/io/src/mol/mmcif_writer.cc index bcefc0ea1..59d30f2e0 100644 --- a/modules/io/src/mol/mmcif_writer.cc +++ b/modules/io/src/mol/mmcif_writer.cc @@ -508,9 +508,25 @@ namespace { String type = ChemClassToChemCompType(res.GetChemClass()); auto it = comp_infos.find(res_name); if(it != comp_infos.end()) { - // check whether type is consistent if(it->second.type != type) { - throw ost::io::IOException("There can be only one"); + // If the already stored type or the incoming type are OTHER, + // we keep the one that is NOT OTHER => everything has preference over + // OTHER. However, if both types are NOT OTHER and they do not match, + // we throw an error. + if(type == "OTHER") { + continue; + } else if (it->second.type == "OTHER") { + CompInfo info; + info.type = type; + comp_infos[res_name] = info; + } else { + std::stringstream ss; + ss << "Residue " << res << "has _chem_comp.type \"" << type; + ss << "\" which is derived from its chem class: " << res.GetChemClass(); + ss << ". Observed already another _chem_comp.type for a residue of "; + ss << "the same name: " << it->second.type; + throw ost::io::IOException(ss.str()); + } } } else { CompInfo info; @@ -1428,7 +1444,7 @@ void MMCifWriter::SetStructure(const ost::mol::EntityHandle& ent, // chains, ligands in separate chains etc. Chain types are inferred from // chain type property set to the chains in ent. ProcessEnt(ent, comp_infos, entity_info_, - atom_site_, pdbx_poly_seq_scheme_); + atom_site_, pdbx_poly_seq_scheme_); } else { // rule based splitting of chains into mmCIF conform chains ProcessEntmmCIFify(ent, comp_infos, entity_info_, -- GitLab