From 289de1349d24f6f180a5c6598af0f354fd3587bf Mon Sep 17 00:00:00 2001 From: Gabriel Studer <gabriel.studer@unibas.ch> Date: Mon, 5 Feb 2024 09:37:35 +0100 Subject: [PATCH] mmcif writer: avoid hardcoded monomer id for water chain DOD is water too! This must be written in the chem_comp category. Otherwise we're ending up with an invalid mmcif file. Instead of just hardcoding, we now add whatever monomer id we observe in any of the residues. --- modules/io/src/mol/mmcif_writer.cc | 33 +++++++++++++++++++++++++++--- 1 file changed, 30 insertions(+), 3 deletions(-) diff --git a/modules/io/src/mol/mmcif_writer.cc b/modules/io/src/mol/mmcif_writer.cc index e944bf9a2..4f00ef22a 100644 --- a/modules/io/src/mol/mmcif_writer.cc +++ b/modules/io/src/mol/mmcif_writer.cc @@ -503,6 +503,26 @@ namespace { for(size_t i = 0; i < entity_infos.size(); ++i) { if(entity_infos[i].type == "water" && type == "water") { AddAsym(asym_chain_name, entity_infos[i]); + // there could be the situation that in the chain we saw before, only + // HOH represented water. This chain might suddenly have DOD. + // We add this to the MMCifWriterEntity to correctly write the chem_comp + // category in the end. + std::set<String> water_mon_ids(entity_infos[i].mon_ids.begin(), + entity_infos[i].mon_ids.end()); + for(auto res: res_list) { + water_mon_ids.insert(res.GetName()); + } + entity_infos[i].mon_ids = std::vector<String>(water_mon_ids.begin(), + water_mon_ids.end()); + // seq and seq_can are irrelevant for water, still keep it in sync + std::vector<String> seq; + std::vector<String> seq_can; + for(auto mon_id: entity_infos[i].mon_ids) { + seq.push_back(MonIDToOLC(mon_id)); + seq_can.push_back("?"); // It's irrelevant anyways + } + entity_infos[i].seq_olcs = seq; + entity_infos[i].seq_can_olcs = seq_can; return i; } if(entity_infos[i].type == type && @@ -571,9 +591,16 @@ namespace { } } else { if(type == "water") { - mon_ids.push_back("HOH"); - seq.push_back("(HOH)"); - seq_can.push_back("?"); + std::set<String> water_mon_ids; + for(auto res: res_list) { + water_mon_ids.insert(res.GetName()); + } + mon_ids = std::vector<String>(water_mon_ids.begin(), + water_mon_ids.end()); + for(auto mon_id: mon_ids) { + seq.push_back(MonIDToOLC(mon_id)); + seq_can.push_back("?"); // It's irrelevant anyways + } } else { for(auto res: res_list) { mon_ids.push_back(res.GetName()); -- GitLab