From 9c4665d52f9b51d36d60f17502a2a4f575e9bd16 Mon Sep 17 00:00:00 2001 From: Gabriel Studer <gabriel.studer@unibas.ch> Date: Thu, 4 Jan 2024 13:24:50 +0100 Subject: [PATCH] mmcif writer: delegate chain typing to dedicated functions in ost mol --- modules/io/src/mol/mmcif_writer.cc | 81 +----------------------------- modules/mol/base/src/chain_type.cc | 54 ++++++++++++++++++++ modules/mol/base/src/chain_type.hh | 49 ++++++++++++++++++ 3 files changed, 105 insertions(+), 79 deletions(-) diff --git a/modules/io/src/mol/mmcif_writer.cc b/modules/io/src/mol/mmcif_writer.cc index be209e559..3cfd364d3 100644 --- a/modules/io/src/mol/mmcif_writer.cc +++ b/modules/io/src/mol/mmcif_writer.cc @@ -152,37 +152,6 @@ namespace { return "other"; } - String GuessEntityPolyType(ost::mol::ChainType chain_type) { - // no real guessing but hardcoded response for every polymer chain type in - // ost::mol::ChainType - - // allowed values according to mmcif_pdbx_v50.dic: - // - cyclic-pseudo-peptide - // - other - // - peptide nucleic acid - // - polydeoxyribonucleotide - // - polydeoxyribonucleotide/polyribonucleotide hybrid - // - polypeptide(D) - // - polypeptide(L) - // - polyribonucleotide - - // added additional type: unknown - // must be handled by caller - - switch(chain_type) { - case ost::mol::CHAINTYPE_POLY: return "other"; - case ost::mol::CHAINTYPE_POLY_PEPTIDE_D: return "polypeptide(D)"; - case ost::mol::CHAINTYPE_POLY_PEPTIDE_L: return "polypeptide(L)"; - case ost::mol::CHAINTYPE_POLY_DN: return "polydeoxyribonucleotide"; - case ost::mol::CHAINTYPE_POLY_RN: return "polyribonucleotide"; - case ost::mol::CHAINTYPE_POLY_DN_RN: return "polydeoxyribonucleotide/polyribonucleotide hybrid"; - case ost::mol::CHAINTYPE_CYCLIC_PSEUDO_PEPTIDE: return "cyclic-pseudo-peptide"; - case ost::mol::CHAINTYPE_POLY_PEPTIDE_DN_RN: return "peptide nucleic acid"; - case ost::mol::CHAINTYPE_OLIGOSACCHARIDE: return "other"; - default: return "unknown"; - } - } - String GuessEntityType(const ost::mol::ResidueHandleList& res_list) { // guesses _entity.type based on residue chem classes @@ -219,41 +188,6 @@ namespace { return "polymer"; } - String GuessEntityType(ost::mol::ChainType chain_type) { - // no real guessing but hardcoded response for every chain type in - // ost::mol::ChainType - - // allowed values according to mmcif_pdbx_v50.dic: - // - branched - // - macrolide - // - non-polymer - // - polymer - // - water - - // added additional type: unknown - // must be handled by caller - - switch(chain_type) { - case ost::mol::CHAINTYPE_POLY: return "polymer"; - case ost::mol::CHAINTYPE_NON_POLY: return "non-polymer"; - case ost::mol::CHAINTYPE_WATER: return "water"; - case ost::mol::CHAINTYPE_POLY_PEPTIDE_D: return "polymer"; - case ost::mol::CHAINTYPE_POLY_PEPTIDE_L: return "polymer"; - case ost::mol::CHAINTYPE_POLY_DN: return "polymer"; - case ost::mol::CHAINTYPE_POLY_RN: return "polymer"; - case ost::mol::CHAINTYPE_POLY_SAC_D: return "polymer"; - case ost::mol::CHAINTYPE_POLY_SAC_L: return "polymer"; - case ost::mol::CHAINTYPE_POLY_DN_RN: return "polymer"; - case ost::mol::CHAINTYPE_UNKNOWN: return "unknown"; - case ost::mol::CHAINTYPE_MACROLIDE: return "macrolide"; - case ost::mol::CHAINTYPE_CYCLIC_PSEUDO_PEPTIDE: return "polymer"; - case ost::mol::CHAINTYPE_POLY_PEPTIDE_DN_RN: return "polymer"; - case ost::mol::CHAINTYPE_BRANCHED: return "branched"; - case ost::mol::CHAINTYPE_OLIGOSACCHARIDE: return "branched"; - default: return "unknown"; - } - } - // internal object with all info to fill chem_comp_ category struct CompInfo { String type; @@ -813,22 +747,11 @@ namespace { std::vector<ost::io::MMCifWriterEntity>& entity_infos) { // use chain_type info attached to chain to determine // _entity.type and _entity_poly.type - String type = GuessEntityType(chain_type); - if(type == "unknown") { - std::stringstream ss; - ss << "Each chain must have valid chain type set, got " << chain_type; - throw ost::io::IOException(ss.str()); - } + String type = ost::mol::EntityTypeFromChainType(chain_type); bool is_poly = type == "polymer"; String poly_type = ""; if(is_poly) { - poly_type = GuessEntityPolyType(chain_type); - if(poly_type == "unknown") { - std::stringstream ss; - ss << "Each polymer chain must have valid polymer chain type set, got "; - ss << chain_type; - throw ost::io::IOException(ss.str()); - } + poly_type = ost::mol::EntityPolyTypeFromChainType(chain_type); } return SetupEntity(asym_chain_name, type, poly_type, res_list, resnum_alignment, entity_infos); diff --git a/modules/mol/base/src/chain_type.cc b/modules/mol/base/src/chain_type.cc index 7604bbc50..39be8389c 100644 --- a/modules/mol/base/src/chain_type.cc +++ b/modules/mol/base/src/chain_type.cc @@ -121,4 +121,58 @@ String StringFromChainType(ChainType type) throw Error(ss.str()); } +String EntityTypeFromChainType(ChainType type) { + switch(type) { + case ost::mol::CHAINTYPE_POLY: return "polymer"; + case ost::mol::CHAINTYPE_NON_POLY: return "non-polymer"; + case ost::mol::CHAINTYPE_WATER: return "water"; + case ost::mol::CHAINTYPE_POLY_PEPTIDE_D: return "polymer"; + case ost::mol::CHAINTYPE_POLY_PEPTIDE_L: return "polymer"; + case ost::mol::CHAINTYPE_POLY_DN: return "polymer"; + case ost::mol::CHAINTYPE_POLY_RN: return "polymer"; + case ost::mol::CHAINTYPE_POLY_SAC_D: return "polymer"; + case ost::mol::CHAINTYPE_POLY_SAC_L: return "polymer"; + case ost::mol::CHAINTYPE_POLY_DN_RN: return "polymer"; + case ost::mol::CHAINTYPE_MACROLIDE: return "macrolide"; + case ost::mol::CHAINTYPE_CYCLIC_PSEUDO_PEPTIDE: return "polymer"; + case ost::mol::CHAINTYPE_POLY_PEPTIDE_DN_RN: return "polymer"; + case ost::mol::CHAINTYPE_BRANCHED: return "branched"; + case ost::mol::CHAINTYPE_OLIGOSACCHARIDE: return "branched"; + } + + std::stringstream ss("Unknown ChainType item found: '"); + ss << type << "'!"; + throw Error(ss.str()); +} + +String EntityPolyTypeFromChainType(ChainType type) { + switch(type) { + case ost::mol::CHAINTYPE_POLY: return "other"; + case ost::mol::CHAINTYPE_POLY_PEPTIDE_D: return "polypeptide(D)"; + case ost::mol::CHAINTYPE_POLY_PEPTIDE_L: return "polypeptide(L)"; + case ost::mol::CHAINTYPE_POLY_DN: return "polydeoxyribonucleotide"; + case ost::mol::CHAINTYPE_POLY_RN: return "polyribonucleotide"; + case ost::mol::CHAINTYPE_POLY_SAC_D: return "other"; // older dictionaries have "polysaccharide(D)" + case ost::mol::CHAINTYPE_POLY_SAC_L: return "other"; // older dictionaries have "polysaccharide(L)" + case ost::mol::CHAINTYPE_POLY_DN_RN: return "polydeoxyribonucleotide/polyribonucleotide hybrid"; + case ost::mol::CHAINTYPE_CYCLIC_PSEUDO_PEPTIDE: return "cyclic-pseudo-peptide"; + case ost::mol::CHAINTYPE_POLY_PEPTIDE_DN_RN: return "peptide nucleic acid"; + } + + std::stringstream ss("Cannot return entity poly type from chain of type: '"); + ss << type << "'!"; + throw Error(ss.str()); +} + +String BranchedTypeFromChainType(ChainType type) { + switch(type) { + case ost::mol::CHAINTYPE_BRANCHED: return "oligosaccharide"; // the only one + case ost::mol::CHAINTYPE_OLIGOSACCHARIDE: return "oligosaccharide"; + } + + std::stringstream ss("Cannot return branched type from chain of type: '"); + ss << type << "'!"; + throw Error(ss.str()); +} + }} //ns diff --git a/modules/mol/base/src/chain_type.hh b/modules/mol/base/src/chain_type.hh index 839844827..6420f0143 100644 --- a/modules/mol/base/src/chain_type.hh +++ b/modules/mol/base/src/chain_type.hh @@ -79,6 +79,55 @@ ChainType DLLEXPORT_OST_MOL ChainTypeFromString(const String& identifier); /// unknown type String DLLEXPORT_OST_MOL StringFromChainType(ChainType type); +/// \brief Return _entity.type consistent with respective mmCIF vocabulary +/// (mmcif_pdbx_v50): +/// - branched +/// - macrolide +/// - non-polymer +/// - polymer +/// - water +/// +/// For consistency with older vocabularies, CHAINTYPE_POLY_SAC_D +/// and CHAINTYPE_POLY_SAC_L return "polymer" +/// +/// \param type ChainType to be translated +/// +/// \return String corresponding to the input, throws a ost::Error on +/// unknown type +String DLLEXPORT_OST_MOL EntityTypeFromChainType(ChainType type); + +/// \brief Return _entity_poly.type consistent with mmCIF dictionary +/// (mmcif_pdbx_v50): +/// - cyclic-pseudo-peptide +/// - other +/// - peptide nucleic acid +/// - polydeoxyribonucleotide +/// - polydeoxyribonucleotide/polyribonucleotide hybrid +/// - polypeptide(D) +/// - polypeptide(L) +/// - polyribonucleotide +/// +/// For consistency with older dictionaries, CHAINTYPE_POLY_SAC_D +/// and CHAINTYPE_POLY_SAC_L are still accepted but return "other". +/// Older dictionaries still had "polysaccharide(D)" and +/// "polysaccharide(L)"" +/// +/// \param type ChainType to be translated +/// +/// \return String corresponding to the input, throws a ost::Error on +/// unknown type or if it's not of _entity.type polymer +String DLLEXPORT_OST_MOL EntityPolyTypeFromChainType(ChainType type); + +/// \brief Return pdbx_entity_branch.type consistent with mmCIF dictionary +/// (mmcif_pdbx_v50): +/// - oligosaccharide +/// +/// \param type ChainType to be translated +/// +/// \return String corresponding to the input, throws a ost::Error on +/// unknown type or if it's not of _entity.type branched +String DLLEXPORT_OST_MOL BranchedTypeFromChainType(ChainType type); + }} //ns #endif -- GitLab