From 89e5e6e773482eff2dffff6b92947ca075040024 Mon Sep 17 00:00:00 2001
From: Gabriel Studer <gabriel.studer@unibas.ch>
Date: Wed, 10 Jan 2024 13:00:36 +0100
Subject: [PATCH] mmcif writer: Make MMCifWriterEntity available from Python

Right now there is only a static constructor for polymer entities
---
 modules/io/pymod/export_mmcif_io.cc | 12 ++++++++
 modules/io/src/mol/mmcif_writer.cc  | 48 +++++++++++++++++++++++++++++
 modules/io/src/mol/mmcif_writer.hh  |  7 +++++
 3 files changed, 67 insertions(+)

diff --git a/modules/io/pymod/export_mmcif_io.cc b/modules/io/pymod/export_mmcif_io.cc
index 3665c351b..f9a575b5f 100644
--- a/modules/io/pymod/export_mmcif_io.cc
+++ b/modules/io/pymod/export_mmcif_io.cc
@@ -147,6 +147,18 @@ void export_mmcif_io()
     .def("Write", &WrapStarWriterWrite, (arg("data_name"), arg("filename")))
   ;
 
+  class_<MMCifWriterEntity>("MMCifWriterEntity", no_init)
+    .def("FromPolymer", &MMCifWriterEntity::FromPolymer).staticmethod("FromPolymer")
+    .add_property("type", &MMCifWriterEntity::type)
+    .add_property("poly_type", &MMCifWriterEntity::poly_type)
+    .add_property("branch_type", &MMCifWriterEntity::branch_type)
+    .add_property("is_poly", &MMCifWriterEntity::is_poly)
+    .add_property("mon_ids", &MMCifWriterEntity::mon_ids)
+    .add_property("seq_olcs", &MMCifWriterEntity::seq_olcs)
+    .add_property("seq_can_olcs", &MMCifWriterEntity::seq_can_olcs)
+    .add_property("asym_ids", &MMCifWriterEntity::asym_ids)
+  ;
+
   class_<MMCifWriter, bases<StarWriter> >("MMCifWriter", init<>())
     .def("SetStructure", &WrapSetStructureHandle, (arg("ent"), arg("mmcif_conform")=true))
     .def("SetStructure", &WrapSetStructureView, (arg("ent"), arg("mmcif_conform")=true))
diff --git a/modules/io/src/mol/mmcif_writer.cc b/modules/io/src/mol/mmcif_writer.cc
index 18831c046..16cf083f6 100644
--- a/modules/io/src/mol/mmcif_writer.cc
+++ b/modules/io/src/mol/mmcif_writer.cc
@@ -17,6 +17,8 @@
 // 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
 //------------------------------------------------------------------------------
 
+#include <unordered_set>
+
 #include <ost/mol/chem_class.hh>
 #include <ost/io/mol/mmcif_writer.hh>
 
@@ -68,6 +70,26 @@ namespace {
     std::vector<int> indices;
   };
 
+  void CheckValidEntityPolyType(const String& entity_poly_type) {
+    std::unordered_set<std::string> s = {"other",
+                                         "polydeoxyribonucleotide",
+                                         "polypeptide(D)",
+                                         "polypeptide(L)",
+                                         "polyribonucleotide",
+                                         "polysaccharide(D)",
+                                         "polysaccharide(L)"};
+    if(s.find(entity_poly_type) == s.end()) {
+      std::stringstream ss;
+      ss << "Observed value is no valid entity_poly.type: \"";
+      ss << entity_poly_type << "\". Allowed values: ";
+      for(auto type: s) {
+        ss << type << ", ";
+      }
+      String err = ss.str();
+      throw ost::io::IOException(err.substr(0, err.size() - 2));
+    }
+  }
+
   // template to allow ost::mol::ResidueHandleList and ost::mol::ResidueViewList
   template<class T>
   String GuessEntityPolyType(const T& res_list) {
@@ -1395,6 +1417,32 @@ namespace {
 
 namespace ost { namespace io {
 
+MMCifWriterEntity MMCifWriterEntity::FromPolymer(const String& entity_poly_type,
+                                                 const std::vector<String>& mon_ids,
+                                                 conop::CompoundLibPtr compound_lib) {
+  CheckValidEntityPolyType(entity_poly_type);
+  MMCifWriterEntity ent;
+  ent.type = "polymer";
+  ent.is_poly = true;
+  ent.poly_type = entity_poly_type;
+  ent.branch_type = "";
+  ent.mon_ids = mon_ids;
+  for(auto mon_id: mon_ids) {
+    // one letter codes rely on compound library
+    ost::conop::CompoundPtr compound = 
+    compound_lib->FindCompound(mon_id, ost::conop::Compound::PDB);
+    if(compound) {
+      char chem_class = compound->GetChemClass();
+      ent.seq_olcs.push_back(MonIDToOLC(chem_class, mon_id));
+      ent.seq_can_olcs.push_back(String(1, compound->GetOneLetterCode()));
+    } else {
+      ent.seq_olcs.push_back("(" + mon_id + ")");
+      ent.seq_can_olcs.push_back("(" + mon_id + ")");
+    }
+  }
+  return ent;
+}
+
 int MMCifWriterEntity::GetAsymIdx(const String& asym_id) const {
   for(size_t i = 0; i < asym_ids.size(); ++i) {
     if(asym_ids[i] == asym_id) {
diff --git a/modules/io/src/mol/mmcif_writer.hh b/modules/io/src/mol/mmcif_writer.hh
index e0ce06eaa..98c62c4be 100644
--- a/modules/io/src/mol/mmcif_writer.hh
+++ b/modules/io/src/mol/mmcif_writer.hh
@@ -22,6 +22,7 @@
 #include <fstream>
 
 #include <ost/mol/entity_handle.hh>
+#include <ost/conop/compound_lib.hh>
 
 #include <ost/io/mol/mmcif_info.hh>
 #include <ost/io/mol/io_profile.hh>
@@ -32,6 +33,12 @@ namespace ost { namespace io {
 
 struct MMCifWriterEntity {
 
+  MMCifWriterEntity() { }
+
+  static MMCifWriterEntity FromPolymer(const String& entity_poly_type,
+                                       const std::vector<String>& mon_ids,
+                                       conop::CompoundLibPtr compound_lib);
+
   int GetAsymIdx(const String& asym_id) const;
 
   // _entity.type
-- 
GitLab