From 9c4665d52f9b51d36d60f17502a2a4f575e9bd16 Mon Sep 17 00:00:00 2001
From: Gabriel Studer <gabriel.studer@unibas.ch>
Date: Thu, 4 Jan 2024 13:24:50 +0100
Subject: [PATCH] mmcif writer: delegate chain typing to dedicated functions in
 ost mol

---
 modules/io/src/mol/mmcif_writer.cc | 81 +-----------------------------
 modules/mol/base/src/chain_type.cc | 54 ++++++++++++++++++++
 modules/mol/base/src/chain_type.hh | 49 ++++++++++++++++++
 3 files changed, 105 insertions(+), 79 deletions(-)

diff --git a/modules/io/src/mol/mmcif_writer.cc b/modules/io/src/mol/mmcif_writer.cc
index be209e559..3cfd364d3 100644
--- a/modules/io/src/mol/mmcif_writer.cc
+++ b/modules/io/src/mol/mmcif_writer.cc
@@ -152,37 +152,6 @@ namespace {
     return "other";
   }
 
-  String GuessEntityPolyType(ost::mol::ChainType chain_type) {
-    // no real guessing but hardcoded response for every polymer chain type in
-    // ost::mol::ChainType
-
-    // allowed values according to mmcif_pdbx_v50.dic:
-    // - cyclic-pseudo-peptide 	
-    // - other 	
-    // - peptide nucleic acid 	
-    // - polydeoxyribonucleotide 	
-    // - polydeoxyribonucleotide/polyribonucleotide hybrid 	
-    // - polypeptide(D) 	
-    // - polypeptide(L) 	
-    // - polyribonucleotide
-
-    // added additional type: unknown
-    // must be handled by caller
-
-    switch(chain_type) {
-      case ost::mol::CHAINTYPE_POLY: return "other";
-      case ost::mol::CHAINTYPE_POLY_PEPTIDE_D: return "polypeptide(D)";
-      case ost::mol::CHAINTYPE_POLY_PEPTIDE_L: return "polypeptide(L)";
-      case ost::mol::CHAINTYPE_POLY_DN: return "polydeoxyribonucleotide";
-      case ost::mol::CHAINTYPE_POLY_RN: return "polyribonucleotide";
-      case ost::mol::CHAINTYPE_POLY_DN_RN: return "polydeoxyribonucleotide/polyribonucleotide hybrid";
-      case ost::mol::CHAINTYPE_CYCLIC_PSEUDO_PEPTIDE: return "cyclic-pseudo-peptide";
-      case ost::mol::CHAINTYPE_POLY_PEPTIDE_DN_RN: return "peptide nucleic acid";
-      case ost::mol::CHAINTYPE_OLIGOSACCHARIDE: return "other";
-      default: return "unknown";
-    }
-  }
-
   String GuessEntityType(const ost::mol::ResidueHandleList& res_list) {
 
     // guesses _entity.type based on residue chem classes
@@ -219,41 +188,6 @@ namespace {
     return "polymer";
   }
 
-  String GuessEntityType(ost::mol::ChainType chain_type) {
-    // no real guessing but hardcoded response for every chain type in
-    // ost::mol::ChainType
-
-    // allowed values according to mmcif_pdbx_v50.dic:
-    // - branched
-    // - macrolide
-    // - non-polymer
-    // - polymer
-    // - water
-
-    // added additional type: unknown
-    // must be handled by caller
-
-    switch(chain_type) {
-      case ost::mol::CHAINTYPE_POLY: return "polymer";
-      case ost::mol::CHAINTYPE_NON_POLY: return "non-polymer";
-      case ost::mol::CHAINTYPE_WATER: return "water";
-      case ost::mol::CHAINTYPE_POLY_PEPTIDE_D: return "polymer";
-      case ost::mol::CHAINTYPE_POLY_PEPTIDE_L: return "polymer";
-      case ost::mol::CHAINTYPE_POLY_DN: return "polymer";
-      case ost::mol::CHAINTYPE_POLY_RN: return "polymer";
-      case ost::mol::CHAINTYPE_POLY_SAC_D: return "polymer";
-      case ost::mol::CHAINTYPE_POLY_SAC_L: return "polymer";
-      case ost::mol::CHAINTYPE_POLY_DN_RN: return "polymer";
-      case ost::mol::CHAINTYPE_UNKNOWN: return "unknown";
-      case ost::mol::CHAINTYPE_MACROLIDE: return "macrolide";         
-      case ost::mol::CHAINTYPE_CYCLIC_PSEUDO_PEPTIDE: return "polymer";
-      case ost::mol::CHAINTYPE_POLY_PEPTIDE_DN_RN: return "polymer";
-      case ost::mol::CHAINTYPE_BRANCHED: return "branched";
-      case ost::mol::CHAINTYPE_OLIGOSACCHARIDE: return "branched";
-      default: return "unknown";
-    }
-  }
-
   // internal object with all info to fill chem_comp_ category
   struct CompInfo {
     String type;
@@ -813,22 +747,11 @@ namespace {
                   std::vector<ost::io::MMCifWriterEntity>& entity_infos) {
     // use chain_type info attached to chain to determine
     // _entity.type and _entity_poly.type
-    String type = GuessEntityType(chain_type);
-    if(type == "unknown") {
-      std::stringstream ss;
-      ss << "Each chain must have valid chain type set, got " << chain_type;
-      throw ost::io::IOException(ss.str());
-    }
+    String type = ost::mol::EntityTypeFromChainType(chain_type);
     bool is_poly = type == "polymer";
     String poly_type = "";
     if(is_poly) {
-      poly_type = GuessEntityPolyType(chain_type);
-      if(poly_type == "unknown") {
-        std::stringstream ss;
-        ss << "Each polymer chain must have valid polymer chain type set, got ";
-        ss << chain_type;
-        throw ost::io::IOException(ss.str());
-      }
+      poly_type = ost::mol::EntityPolyTypeFromChainType(chain_type);
     }
     return SetupEntity(asym_chain_name, type, poly_type, res_list,
                        resnum_alignment, entity_infos);
diff --git a/modules/mol/base/src/chain_type.cc b/modules/mol/base/src/chain_type.cc
index 7604bbc50..39be8389c 100644
--- a/modules/mol/base/src/chain_type.cc
+++ b/modules/mol/base/src/chain_type.cc
@@ -121,4 +121,58 @@ String StringFromChainType(ChainType type)
   throw Error(ss.str());
 }
 
+String EntityTypeFromChainType(ChainType type) {
+  switch(type) {
+     case ost::mol::CHAINTYPE_POLY: return "polymer";
+     case ost::mol::CHAINTYPE_NON_POLY: return "non-polymer";
+     case ost::mol::CHAINTYPE_WATER: return "water";
+     case ost::mol::CHAINTYPE_POLY_PEPTIDE_D: return "polymer";
+     case ost::mol::CHAINTYPE_POLY_PEPTIDE_L: return "polymer";
+     case ost::mol::CHAINTYPE_POLY_DN: return "polymer";
+     case ost::mol::CHAINTYPE_POLY_RN: return "polymer";
+     case ost::mol::CHAINTYPE_POLY_SAC_D: return "polymer";
+     case ost::mol::CHAINTYPE_POLY_SAC_L: return "polymer";
+     case ost::mol::CHAINTYPE_POLY_DN_RN: return "polymer";
+     case ost::mol::CHAINTYPE_MACROLIDE: return "macrolide";         
+     case ost::mol::CHAINTYPE_CYCLIC_PSEUDO_PEPTIDE: return "polymer";
+     case ost::mol::CHAINTYPE_POLY_PEPTIDE_DN_RN: return "polymer";
+     case ost::mol::CHAINTYPE_BRANCHED: return "branched";
+     case ost::mol::CHAINTYPE_OLIGOSACCHARIDE: return "branched";
+  }
+
+  std::stringstream ss("Unknown ChainType item found: '");
+  ss << type << "'!";
+  throw Error(ss.str());
+}
+
+String EntityPolyTypeFromChainType(ChainType type) {
+  switch(type) {
+     case ost::mol::CHAINTYPE_POLY: return "other";
+     case ost::mol::CHAINTYPE_POLY_PEPTIDE_D: return "polypeptide(D)";
+     case ost::mol::CHAINTYPE_POLY_PEPTIDE_L: return "polypeptide(L)";
+     case ost::mol::CHAINTYPE_POLY_DN: return "polydeoxyribonucleotide";
+     case ost::mol::CHAINTYPE_POLY_RN: return "polyribonucleotide";
+     case ost::mol::CHAINTYPE_POLY_SAC_D: return "other"; // older dictionaries have "polysaccharide(D)"
+     case ost::mol::CHAINTYPE_POLY_SAC_L: return "other"; // older dictionaries have "polysaccharide(L)"
+     case ost::mol::CHAINTYPE_POLY_DN_RN: return "polydeoxyribonucleotide/polyribonucleotide hybrid";
+     case ost::mol::CHAINTYPE_CYCLIC_PSEUDO_PEPTIDE: return "cyclic-pseudo-peptide";
+     case ost::mol::CHAINTYPE_POLY_PEPTIDE_DN_RN: return "peptide nucleic acid";
+  }
+
+  std::stringstream ss("Cannot return entity poly type from chain of type: '");
+  ss << type << "'!";
+  throw Error(ss.str());
+}
+
+String BranchedTypeFromChainType(ChainType type) {
+  switch(type) {
+     case ost::mol::CHAINTYPE_BRANCHED: return "oligosaccharide"; // the only one
+     case ost::mol::CHAINTYPE_OLIGOSACCHARIDE: return "oligosaccharide";
+  }
+
+  std::stringstream ss("Cannot return branched type from chain of type: '");
+  ss << type << "'!";
+  throw Error(ss.str());
+}
+
 }} //ns
diff --git a/modules/mol/base/src/chain_type.hh b/modules/mol/base/src/chain_type.hh
index 839844827..6420f0143 100644
--- a/modules/mol/base/src/chain_type.hh
+++ b/modules/mol/base/src/chain_type.hh
@@ -79,6 +79,55 @@ ChainType DLLEXPORT_OST_MOL ChainTypeFromString(const String& identifier);
 ///         unknown type
 String DLLEXPORT_OST_MOL StringFromChainType(ChainType type);
 
+/// \brief Return _entity.type consistent with respective mmCIF vocabulary
+///        (mmcif_pdbx_v50):
+///        - branched
+///        - macrolide
+///        - non-polymer
+///        - polymer
+///        - water
+///
+///        For consistency with older vocabularies, CHAINTYPE_POLY_SAC_D
+///        and CHAINTYPE_POLY_SAC_L return "polymer"
+///
+/// \param type ChainType to be translated
+///
+/// \return String corresponding to the input, throws a ost::Error on
+///         unknown type
+String DLLEXPORT_OST_MOL EntityTypeFromChainType(ChainType type);
+
+/// \brief Return _entity_poly.type consistent with mmCIF dictionary
+///        (mmcif_pdbx_v50):
+///        - cyclic-pseudo-peptide 	
+///        - other 	
+///        - peptide nucleic acid 	
+///        - polydeoxyribonucleotide 	
+///        - polydeoxyribonucleotide/polyribonucleotide hybrid 	
+///        - polypeptide(D) 	
+///        - polypeptide(L) 	
+///        - polyribonucleotide
+///
+///        For consistency with older dictionaries, CHAINTYPE_POLY_SAC_D
+///        and CHAINTYPE_POLY_SAC_L are still accepted but return "other".
+///        Older dictionaries still had "polysaccharide(D)" and
+///        "polysaccharide(L)""
+///
+/// \param type ChainType to be translated
+///
+/// \return String corresponding to the input, throws a ost::Error on
+///         unknown type or if it's not of _entity.type polymer
+String DLLEXPORT_OST_MOL EntityPolyTypeFromChainType(ChainType type);
+
+/// \brief Return pdbx_entity_branch.type consistent with mmCIF dictionary
+///        (mmcif_pdbx_v50):
+///        - oligosaccharide	
+///
+/// \param type ChainType to be translated
+///
+/// \return String corresponding to the input, throws a ost::Error on
+///         unknown type or if it's not of _entity.type branched
+String DLLEXPORT_OST_MOL BranchedTypeFromChainType(ChainType type);
+
 }} //ns
 
 #endif
-- 
GitLab