From 1a7c3f9f8cb27e8423bcc4eaeb2b7dbfb8fb3ed2 Mon Sep 17 00:00:00 2001
From: Gerardo Tauriello <gerardo.tauriello@unibas.ch>
Date: Fri, 28 Jul 2017 17:21:48 +0200
Subject: [PATCH] Added new ChainTypes to mmCIF parser

---
 modules/io/doc/mmcif.rst                |  2 +-
 modules/mol/base/src/chain_type.cc      | 62 +++++++++++++++----------
 modules/mol/base/src/chain_type.hh      |  6 ++-
 modules/mol/base/src/impl/chain_impl.hh |  6 ++-
 modules/mol/base/tests/test_chain.cc    | 28 +++++++++++
 5 files changed, 75 insertions(+), 29 deletions(-)

diff --git a/modules/io/doc/mmcif.rst b/modules/io/doc/mmcif.rst
index bed692ff4..0098cd503 100644
--- a/modules/io/doc/mmcif.rst
+++ b/modules/io/doc/mmcif.rst
@@ -669,7 +669,7 @@ of the annotation available.
 
     Since this function is at the moment mainly used to create biounits from
     mmCIF files to be saved as PDBs, the function assumes that the
-    :class:`ChainType` properties are set correctly. 
+    :class:`~ost.mol.ChainType` properties are set correctly. 
 
     :param asu:  Asymmetric unit to work on. Should be created from a mmCIF
                  file.
diff --git a/modules/mol/base/src/chain_type.cc b/modules/mol/base/src/chain_type.cc
index 2c6c091b9..0c292fb8d 100644
--- a/modules/mol/base/src/chain_type.cc
+++ b/modules/mol/base/src/chain_type.cc
@@ -26,30 +26,36 @@ ChainType ChainTypeFromString(StringRef identifier)
 {
 
   // chain types as found in the entity category of a mmcif file
-  if(StringRef("polymer", 7) == identifier) {
-      return CHAINTYPE_POLY;
-    }else if(StringRef("non-polymer", 11) == identifier) {
-      return CHAINTYPE_NON_POLY;
-    }else if(StringRef("water", 5) == identifier) {
-      return CHAINTYPE_WATER;
+  if (StringRef("polymer", 7) == identifier) {
+    return CHAINTYPE_POLY;
+  } else if (StringRef("non-polymer", 11) == identifier) {
+    return CHAINTYPE_NON_POLY;
+  } else if (StringRef("water", 5) == identifier) {
+    return CHAINTYPE_WATER;
+  } else if (StringRef("macrolide", 9) == identifier) {
+    return CHAINTYPE_MACROLIDE;
   // chain types as found in the entity_poly category of a mmcif file
-    } else if(StringRef("polypeptide(D)", 14) == identifier) {
-      return CHAINTYPE_POLY_PEPTIDE_D;
-    } else if(StringRef("polypeptide(L)", 14) == identifier) {
-      return CHAINTYPE_POLY_PEPTIDE_L;
-    } else if(StringRef("polydeoxyribonucleotide", 23) == identifier) {
-      return CHAINTYPE_POLY_DN;
-    } else if(StringRef("polyribonucleotide", 18) == identifier) {
-      return CHAINTYPE_POLY_RN;
-    } else if(StringRef("polysaccharide(D)", 17) == identifier) {
-      return CHAINTYPE_POLY_SAC_D;
-    } else if(StringRef("polysaccharide(L)", 17) == identifier) {
-      return CHAINTYPE_POLY_SAC_L;
-    } else if(StringRef("polydeoxyribonucleotide/polyribonucleotide hybrid",
+  } else if (StringRef("polypeptide(D)", 14) == identifier) {
+    return CHAINTYPE_POLY_PEPTIDE_D;
+  } else if (StringRef("polypeptide(L)", 14) == identifier) {
+    return CHAINTYPE_POLY_PEPTIDE_L;
+  } else if (StringRef("polydeoxyribonucleotide", 23) == identifier) {
+    return CHAINTYPE_POLY_DN;
+  } else if (StringRef("polyribonucleotide", 18) == identifier) {
+    return CHAINTYPE_POLY_RN;
+  } else if (StringRef("polysaccharide(D)", 17) == identifier) {
+    return CHAINTYPE_POLY_SAC_D;
+  } else if (StringRef("polysaccharide(L)", 17) == identifier) {
+    return CHAINTYPE_POLY_SAC_L;
+  } else if (StringRef("polydeoxyribonucleotide/polyribonucleotide hybrid",
                         49) == identifier) {
-      return CHAINTYPE_POLY_DN_RN;
-  } else if(StringRef("other", 5) == identifier) {
-      return CHAINTYPE_UNKNOWN;
+    return CHAINTYPE_POLY_DN_RN;
+  } else if (StringRef("cyclic-pseudo-peptide", 21) == identifier) {
+    return CHAINTYPE_CYCLIC_PSEUDO_PEPTIDE;
+  } else if (StringRef("peptide nucleic acid", 20) == identifier) {
+    return CHAINTYPE_POLY_PEPTIDE_DN_RN;
+  } else if (StringRef("other", 5) == identifier) {
+    return CHAINTYPE_UNKNOWN;
   }
 
   throw Error("Unrecognised chain type descriptor found: '" +
@@ -69,10 +75,12 @@ String StringFromChainType(ChainType type)
     return "polymer";
   } else if (CHAINTYPE_NON_POLY == type) {
     return "non-polymer";
-  }  else if (CHAINTYPE_WATER == type) {
+  } else if (CHAINTYPE_WATER == type) {
     return "water";
+  } else if (CHAINTYPE_MACROLIDE == type) {
+    return "macrolide";
   // chain types as found in the entity_poly category of a mmcif file
-  }  else if (CHAINTYPE_POLY_PEPTIDE_D == type) {
+  } else if (CHAINTYPE_POLY_PEPTIDE_D == type) {
     return "polypeptide(D)";
   } else if (CHAINTYPE_POLY_PEPTIDE_L == type) {
     return "polypeptide(L)";
@@ -86,11 +94,15 @@ String StringFromChainType(ChainType type)
     return "polysaccharide(L)";
   } else if (CHAINTYPE_POLY_DN_RN == type) {
     return "polydeoxyribonucleotide/polyribonucleotide hybrid";
+  } else if (CHAINTYPE_CYCLIC_PSEUDO_PEPTIDE == type) {
+    return "cyclic-pseudo-peptide";
+  } else if (CHAINTYPE_POLY_PEPTIDE_DN_RN == type) {
+    return "peptide nucleic acid";
   } else if (CHAINTYPE_UNKNOWN == type) {
     return "other";
   }
 
-  std::stringstream ss("Unknonw ChainType item found: '");
+  std::stringstream ss("Unknown ChainType item found: '");
   ss << type << "'!";
   throw Error(ss.str());
 }
diff --git a/modules/mol/base/src/chain_type.hh b/modules/mol/base/src/chain_type.hh
index 82d9115f9..a195a4042 100644
--- a/modules/mol/base/src/chain_type.hh
+++ b/modules/mol/base/src/chain_type.hh
@@ -38,8 +38,12 @@ typedef enum {
   CHAINTYPE_POLY_RN,        ///< polyribonucleotide
   CHAINTYPE_POLY_SAC_D,     ///< polysaccharide(D)
   CHAINTYPE_POLY_SAC_L,     ///< polysaccharide(L)
-  CHAINTYPE_POLY_DN_RN,     ///<polydeoxyribonucleotide/ -ribonucleotide hybrid
+  CHAINTYPE_POLY_DN_RN,     ///< polydeoxyribonucleotide/ -ribonucleotide hybrid
   CHAINTYPE_UNKNOWN,        ///< guess what
+  // new chain types
+  CHAINTYPE_MACROLIDE,              ///< macrolide
+  CHAINTYPE_CYCLIC_PSEUDO_PEPTIDE,  ///< cyclic-pseudo-peptide
+  CHAINTYPE_POLY_PEPTIDE_DN_RN,     ///< peptide nucleic acid
   CHAINTYPE_N_CHAINTYPES    ///< no. of chain types
 } ChainType;
 
diff --git a/modules/mol/base/src/impl/chain_impl.hh b/modules/mol/base/src/impl/chain_impl.hh
index 82334c6fb..b6a4a34a6 100644
--- a/modules/mol/base/src/impl/chain_impl.hh
+++ b/modules/mol/base/src/impl/chain_impl.hh
@@ -68,7 +68,8 @@ public:
   bool IsPolymer() const
   {
     return type_==CHAINTYPE_POLY || this->IsPolypeptide() || 
-          this->IsPolynucleotide() || this->IsPolysaccharide();
+           this->IsPolynucleotide() || this->IsPolysaccharide() ||
+           type_==CHAINTYPE_POLY_PEPTIDE_DN_RN;
   }
   /// \brief whether the chain is a polysaccharide
   bool IsPolysaccharide() const
@@ -78,7 +79,8 @@ public:
   /// \brief whether the chain is a polypeptide
   bool IsPolypeptide() const
   {
-    return type_==CHAINTYPE_POLY_PEPTIDE_D || type_==CHAINTYPE_POLY_PEPTIDE_L;
+    return type_==CHAINTYPE_POLY_PEPTIDE_D || type_==CHAINTYPE_POLY_PEPTIDE_L ||
+           type_==CHAINTYPE_CYCLIC_PSEUDO_PEPTIDE;
   }
   /// \brief whether the chain is a polynucleotide
   bool IsPolynucleotide() const
diff --git a/modules/mol/base/tests/test_chain.cc b/modules/mol/base/tests/test_chain.cc
index c857cce09..8f6080401 100644
--- a/modules/mol/base/tests/test_chain.cc
+++ b/modules/mol/base/tests/test_chain.cc
@@ -302,6 +302,24 @@ BOOST_AUTO_TEST_CASE(chain_type)
    BOOST_CHECK(ch1.GetType() == CHAINTYPE_N_CHAINTYPES);
    e.SetChainType(ch1, CHAINTYPE_UNKNOWN);
    BOOST_CHECK(ch1.GetType() == CHAINTYPE_UNKNOWN);
+   e.SetChainType(ch1, CHAINTYPE_MACROLIDE);
+   BOOST_CHECK(ch1.GetType() == CHAINTYPE_MACROLIDE);
+   BOOST_CHECK(!ch1.IsPolymer());
+   BOOST_CHECK(!ch1.IsPolysaccharide());
+   BOOST_CHECK(!ch1.IsPolypeptide());
+   BOOST_CHECK(!ch1.IsPolynucleotide());
+   e.SetChainType(ch1, CHAINTYPE_CYCLIC_PSEUDO_PEPTIDE);
+   BOOST_CHECK(ch1.GetType() == CHAINTYPE_CYCLIC_PSEUDO_PEPTIDE);
+   BOOST_CHECK(ch1.IsPolymer());
+   BOOST_CHECK(!ch1.IsPolysaccharide());
+   BOOST_CHECK(ch1.IsPolypeptide());
+   BOOST_CHECK(!ch1.IsPolynucleotide());
+   e.SetChainType(ch1, CHAINTYPE_POLY_PEPTIDE_DN_RN);
+   BOOST_CHECK(ch1.GetType() == CHAINTYPE_POLY_PEPTIDE_DN_RN);
+   BOOST_CHECK(ch1.IsPolymer());
+   BOOST_CHECK(!ch1.IsPolysaccharide());
+   BOOST_CHECK(!ch1.IsPolypeptide());
+   BOOST_CHECK(!ch1.IsPolynucleotide());
 
    // string -> chain type
    BOOST_CHECK(ChainTypeFromString("polymer") == CHAINTYPE_POLY);
@@ -323,6 +341,11 @@ BOOST_AUTO_TEST_CASE(chain_type)
                       "polydeoxyribonucleotide/polyribonucleotide hybrid") ==
                CHAINTYPE_POLY_DN_RN);
    BOOST_CHECK(ChainTypeFromString("other") == CHAINTYPE_UNKNOWN);
+   BOOST_CHECK(ChainTypeFromString("macrolide") == CHAINTYPE_MACROLIDE);
+   BOOST_CHECK(ChainTypeFromString("cyclic-pseudo-peptide") ==
+               CHAINTYPE_CYCLIC_PSEUDO_PEPTIDE);
+   BOOST_CHECK(ChainTypeFromString("peptide nucleic acid") ==
+               CHAINTYPE_POLY_PEPTIDE_DN_RN);
    BOOST_CHECK_THROW(ChainTypeFromString("supposed to fail"),
                      Error);
 
@@ -344,6 +367,11 @@ BOOST_AUTO_TEST_CASE(chain_type)
    BOOST_CHECK(StringFromChainType(CHAINTYPE_POLY_DN_RN) ==
                "polydeoxyribonucleotide/polyribonucleotide hybrid");
    BOOST_CHECK(StringFromChainType(CHAINTYPE_UNKNOWN) == "other");
+   BOOST_CHECK(StringFromChainType(CHAINTYPE_MACROLIDE) == "macrolide");
+   BOOST_CHECK(StringFromChainType(CHAINTYPE_CYCLIC_PSEUDO_PEPTIDE) ==
+               "cyclic-pseudo-peptide");
+   BOOST_CHECK(StringFromChainType(CHAINTYPE_POLY_PEPTIDE_DN_RN) ==
+               "peptide nucleic acid");
    BOOST_CHECK_THROW(StringFromChainType(CHAINTYPE_N_CHAINTYPES),
                      Error);
 }
-- 
GitLab