From d72c2972d102bb50e0b8ebca5d6a707790418a68 Mon Sep 17 00:00:00 2001
From: Gabriel Studer <gabriel.studer@unibas.ch>
Date: Fri, 5 Jan 2024 16:23:32 +0100
Subject: [PATCH] mmcif writer: enable _pdbx_entity_branch category

---
 modules/io/src/mol/mmcif_writer.cc | 58 +++++++++++++++++++++++++++---
 modules/io/src/mol/mmcif_writer.hh |  4 +++
 2 files changed, 57 insertions(+), 5 deletions(-)

diff --git a/modules/io/src/mol/mmcif_writer.cc b/modules/io/src/mol/mmcif_writer.cc
index 2c5490ea4..4af329c28 100644
--- a/modules/io/src/mol/mmcif_writer.cc
+++ b/modules/io/src/mol/mmcif_writer.cc
@@ -154,6 +154,16 @@ namespace {
     return "other";
   }
 
+  // template to allow ost::mol::ResidueHandleList and ost::mol::ResidueViewList
+  template<class T>
+  String GuessEntityBranchType(const T& res_list) {
+    // guesses _pdbx_entity_branch.type based on residue chem classes
+    //
+    // Thats a hard one... only allowed value according to mmcif_pdbx_v50.dic:
+    // oligosaccharide
+    return "oligosaccharide";
+  }
+
   // template to allow ost::mol::ResidueHandleList and ost::mol::ResidueViewList
   template<class T>
   String GuessEntityType(const T& res_list) {
@@ -612,13 +622,14 @@ namespace {
   int SetupEntity(const String& asym_chain_name,
                   const String& type,
                   const String& poly_type,
+                  const String& branch_type,
                   const T& res_list,
                   bool resnum_alignment,
                   std::vector<ost::io::MMCifWriterEntity>& entity_infos) {
 
     bool is_poly = type == "polymer";
 
-    if(!is_poly && res_list.size() != 1 && type != "water") {
+    if(!is_poly && res_list.size() != 1 && type != "water" && type != "branched") {
       std::stringstream ss;
       ss << "Cannot setup entity with " << res_list.size() << " residues ";
       ss << "but is of type: " << type;
@@ -632,7 +643,8 @@ namespace {
         return i;
       }
       if(entity_infos[i].type == type &&
-         entity_infos[i].poly_type == poly_type) {
+         entity_infos[i].poly_type == poly_type &&
+         entity_infos[i].branch_type == branch_type) {
         if(is_poly && resnum_alignment) {
           if(MatchEntityResnum(res_list, entity_infos[i])) {
             AddAsymResnum(asym_chain_name, res_list, entity_infos[i]);
@@ -718,6 +730,7 @@ namespace {
     entity_infos.push_back(ost::io::MMCifWriterEntity());
     entity_infos.back().type = type;
     entity_infos.back().poly_type = poly_type;
+    entity_infos.back().branch_type = branch_type;
     entity_infos.back().mon_ids = mon_ids;
     entity_infos.back().seq_olcs = seq;
     entity_infos.back().seq_can_olcs = seq_can;
@@ -746,7 +759,12 @@ namespace {
     if(is_poly) {
       poly_type = GuessEntityPolyType(res_list);
     }
-    return SetupEntity(asym_chain_name, type, poly_type, res_list,
+    bool is_branched = type == "branched";
+    String branch_type = "";
+    if(is_branched) {
+      branch_type = GuessEntityBranchType(res_list);
+    }
+    return SetupEntity(asym_chain_name, type, poly_type, branch_type, res_list,
                        resnum_alignment, entity_infos);
   }
 
@@ -765,7 +783,12 @@ namespace {
     if(is_poly) {
       poly_type = ost::mol::EntityPolyTypeFromChainType(chain_type);
     }
-    return SetupEntity(asym_chain_name, type, poly_type, res_list,
+    bool is_branched = type == "branched";
+    String branch_type = "";
+    if(is_branched) {
+      branch_type = ost::mol::BranchedTypeFromChainType(chain_type);
+    }
+    return SetupEntity(asym_chain_name, type, poly_type, branch_type, res_list,
                        resnum_alignment, entity_infos);
   }
 
@@ -855,6 +878,14 @@ namespace {
     return sl;    
   }
 
+  ost::io::StarWriterLoopPtr Setup_pdbx_entity_branch_ptr() {
+    ost::io::StarWriterLoopDesc desc("_pdbx_entity_branch");
+    desc.Add("entity_id");
+    desc.Add("type");
+    ost::io::StarWriterLoopPtr sl(new ost::io::StarWriterLoop(desc));
+    return sl;    
+  }
+
   void Feed_atom_type(ost::io::StarWriterLoopPtr atom_type_ptr,
                       ost::io::StarWriterLoopPtr atom_site_ptr) {
     // we're just extracting every type_symbol that we observed
@@ -1112,6 +1143,20 @@ namespace {
     }
   }
 
+  void Feed_pdbx_entity_branch(ost::io::StarWriterLoopPtr pdbx_entity_branch_ptr,
+                               const std::vector<ost::io::MMCifWriterEntity>& entity_infos) {
+    std::vector<ost::io::StarWriterValue> branch_data;
+    branch_data.push_back(ost::io::StarWriterValue::FromInt(0));
+    branch_data.push_back(ost::io::StarWriterValue::FromString("oligosaccharide"));
+    for(size_t i = 0; i < entity_infos.size(); ++i) {
+      if(entity_infos[i].type == "branched") {
+        branch_data[0] = ost::io::StarWriterValue::FromInt(i);
+        branch_data[1] = ost::io::StarWriterValue::FromString(entity_infos[i].branch_type);
+        pdbx_entity_branch_ptr->AddData(branch_data);
+      }
+    }
+  }
+
   // template to allow ost::mol::ResidueHandleList and ost::mol::ResidueViewList
   template<class T>
   void ProcessEntmmCIFify(const std::vector<T>& res_lists,
@@ -1418,6 +1463,7 @@ void MMCifWriter::Setup() {
   entity_poly_ = Setup_entity_poly_ptr();
   entity_poly_seq_ = Setup_entity_poly_seq_ptr();
   chem_comp_ = Setup_chem_comp_ptr();
+  pdbx_entity_branch_ = Setup_pdbx_entity_branch_ptr();
 }
 
 void MMCifWriter::Finalize() {
@@ -1433,7 +1479,8 @@ void MMCifWriter::Finalize() {
   Feed_entity_poly(entity_poly_, entity_info_);
   Feed_entity_poly_seq(entity_poly_seq_, entity_info_);
   Feed_chem_comp(chem_comp_, comp_info_);
-  Feed_atom_type(atom_type_, atom_site_); 
+  Feed_atom_type(atom_type_, atom_site_);
+  Feed_pdbx_entity_branch(pdbx_entity_branch_, entity_info_);
 
   // finalize
   this->Push(chem_comp_);
@@ -1443,6 +1490,7 @@ void MMCifWriter::Finalize() {
   this->Push(entity_poly_seq_);
   this->Push(pdbx_poly_seq_scheme_);
   this->Push(atom_type_);
+  this->Push(pdbx_entity_branch_);
   this->Push(atom_site_);
 
   structure_set_ = true;
diff --git a/modules/io/src/mol/mmcif_writer.hh b/modules/io/src/mol/mmcif_writer.hh
index c176ac74f..e0ce06eaa 100644
--- a/modules/io/src/mol/mmcif_writer.hh
+++ b/modules/io/src/mol/mmcif_writer.hh
@@ -40,6 +40,9 @@ struct MMCifWriterEntity {
   // _entity_poly.type
   String poly_type;  
 
+  // __pdbx_entity_branch.type
+  String branch_type;
+
   // Names of chains in AU that are assigned to this entity
   std::vector<String> asym_ids;
 
@@ -98,6 +101,7 @@ private:
   StarWriterLoopPtr entity_poly_;
   StarWriterLoopPtr entity_poly_seq_;
   StarWriterLoopPtr chem_comp_;
+  StarWriterLoopPtr pdbx_entity_branch_;
   bool structure_set_;
 };
 
-- 
GitLab