From cd7013478638b9330113b0897dbc6373dc228c7a Mon Sep 17 00:00:00 2001
From: Gabriel Studer <gabriel.studer@unibas.ch>
Date: Tue, 30 May 2023 12:17:32 +0200
Subject: [PATCH] read SEQRES by default in mmcif reader

Performance wise this doesn't really make a difference but properly
setting the IsLigand property depends on it.
---
 modules/io/pymod/__init__.py          |  6 +++---
 modules/io/src/mol/mmcif_reader.cc    | 20 +++++++++++++-------
 modules/io/src/mol/mmcif_reader.hh    |  2 +-
 modules/io/tests/test_mmcif_reader.cc |  1 +
 4 files changed, 18 insertions(+), 11 deletions(-)

diff --git a/modules/io/pymod/__init__.py b/modules/io/pymod/__init__.py
index 0f8131382..ca619a156 100644
--- a/modules/io/pymod/__init__.py
+++ b/modules/io/pymod/__init__.py
@@ -393,14 +393,15 @@ def LoadMMCIF(filename, fault_tolerant=None, calpha_only=None,
                  interpreted as the pdb id.
   :type remote: :class:`bool`
 
-  :param seqres: Whether to read SEQRES records. If True, a
+  :param seqres: Whether to return SEQRES records. If True, a
                  :class:`~ost.seq.SequenceList` object is returned as the second
                  item. The sequences in the list are named according to the
                  mmCIF chain name.
                  This feature requires a default
                  :class:`compound library <ost.conop.CompoundLib>`
                  to be defined and accessible via
-                 :func:`~ost.conop.GetDefaultLib` or an empty list is returned.
+                 :func:`~ost.conop.GetDefaultLib`. One letter codes of non
+                 standard compounds are set to X otherwise.
   :type seqres: :class:`bool`
 
   :param info: Whether to return an info container with the other output.
@@ -434,7 +435,6 @@ def LoadMMCIF(filename, fault_tolerant=None, calpha_only=None,
   try:
     ent = mol.CreateEntity()
     reader = MMCifReader(filename, ent, prof)
-    reader.read_seqres = seqres
     
     # NOTE: to speed up things, we could introduce a restrict_chains parameter
     #       similar to the one in LoadPDB. Here, it would have to be a list/set
diff --git a/modules/io/src/mol/mmcif_reader.cc b/modules/io/src/mol/mmcif_reader.cc
index e4ab24090..7f7966ce1 100644
--- a/modules/io/src/mol/mmcif_reader.cc
+++ b/modules/io/src/mol/mmcif_reader.cc
@@ -24,9 +24,14 @@
 #include <ost/dyn_cast.hh>
 #include <ost/mol/xcs_editor.hh>
 #include <ost/conop/conop.hh>
+#include <ost/conop/minimal_compound_lib.hh>
 
 #include <ost/io/mol/mmcif_reader.hh>
 
+
+
+#include <iostream>
+
 namespace ost { namespace io {
 
 
@@ -65,7 +70,7 @@ void MMCifReader::Init()
   curr_chain_           = mol::ChainHandle();
   curr_residue_         = mol::ResidueHandle();
   seqres_               = seq::CreateSequenceList();
-  read_seqres_          = false;
+  read_seqres_          = true;
   warned_rule_based_    = false;
   info_                 = MMCifInfo();
 }
@@ -742,15 +747,16 @@ void MMCifReader::ParseEntityPoly(const std::vector<StringRef>& columns)
     } else if (indices_[PDBX_SEQ_ONE_LETTER_CODE] != -1) {
       seqres=columns[indices_[PDBX_SEQ_ONE_LETTER_CODE]];
 
-      conop::CompoundLibPtr comp_lib=conop::Conopology::Instance()
-                                            .GetDefaultLib();
+      conop::CompoundLibBasePtr comp_lib=conop::Conopology::Instance()
+                                                .GetDefaultLib();
       if (!comp_lib) {
         if (!warned_rule_based_) {
-          LOG_WARNING("SEQRES import requires a compound library. "
-                       "Ignoring SEQRES records");      
+          LOG_WARNING("SEQRES import requires a valid compound library to "
+                       "handle non standard compounds. Their One letter "
+                       "codes will be set to X.");      
         }
         warned_rule_based_=true;
-        return;
+        comp_lib = conop::CompoundLibBasePtr(new ost::conop::MinimalCompoundLib);
       }
       edm_it->second.seqres = this->ConvertSEQRES(seqres.str_no_whitespace(),
                                                   comp_lib);
@@ -763,7 +769,7 @@ void MMCifReader::ParseEntityPoly(const std::vector<StringRef>& columns)
 }
 
 String MMCifReader::ConvertSEQRES(const String& seqres, 
-                                  conop::CompoundLibPtr comp_lib)
+                                  conop::CompoundLibBasePtr comp_lib)
 {
   String can_seqres;
   for (String::const_iterator i=seqres.begin(), e=seqres.end(); i!=e; ++i) {
diff --git a/modules/io/src/mol/mmcif_reader.hh b/modules/io/src/mol/mmcif_reader.hh
index 3d10f03d4..1f21c3928 100644
--- a/modules/io/src/mol/mmcif_reader.hh
+++ b/modules/io/src/mol/mmcif_reader.hh
@@ -240,7 +240,7 @@ protected:
   /// not possible, however, since the PDB assigns multiple one letter codes 
   /// to some of the residues. To be consistent, we have to do the conversion
   /// on our own.
-  String ConvertSEQRES(const String& seqres, conop::CompoundLibPtr compound_lib);
+  String ConvertSEQRES(const String& seqres, conop::CompoundLibBasePtr compound_lib);
   /// \brief Fetch mmCIF citation_author information
   ///
   /// \param columns data row
diff --git a/modules/io/tests/test_mmcif_reader.cc b/modules/io/tests/test_mmcif_reader.cc
index bee89e0e2..e03eb0073 100644
--- a/modules/io/tests/test_mmcif_reader.cc
+++ b/modules/io/tests/test_mmcif_reader.cc
@@ -414,6 +414,7 @@ BOOST_AUTO_TEST_CASE(mmcif_entity_poly_tests)
   BOOST_TEST_MESSAGE("          testing type recognition...");
   {
     TestMMCifReaderProtected tmmcif_p("testfiles/mmcif/atom_site.mmcif", eh);
+    tmmcif_p.SetReadSeqRes(false);
     std::vector<StringRef> columns;
 
     // create corresponding entity entry
-- 
GitLab