From 491c71b4e8a7290464f8c5f82015ae0b2f2a4fa1 Mon Sep 17 00:00:00 2001
From: Stefan Bienert <stefan.bienert@unibas.ch>
Date: Fri, 20 Jul 2012 15:10:26 +0200
Subject: [PATCH] (cherry-pick) Added PDB chain name to Cif chain name map.

Solved conflicts:

	modules/io/doc/mmcif.rst
	modules/io/pymod/export_mmcif_io.cc
	modules/io/src/mol/mmcif_info.cc
	modules/io/src/mol/mmcif_info.hh
	modules/io/tests/test_mmcif_info.cc
---
 modules/io/doc/mmcif.rst                      | 39 ++++++++++++++++++-
 modules/io/pymod/export_mmcif_io.cc           |  4 ++
 modules/io/src/mol/mmcif_info.cc              | 36 +++++++++++++++++
 modules/io/src/mol/mmcif_info.hh              | 27 +++++++++++++
 modules/io/src/mol/mmcif_reader.cc            |  4 ++
 modules/io/tests/test_mmcif_info.cc           | 12 +++++-
 .../io/tests/testfiles/mmcif/atom_site.mmcif  |  8 ++--
 7 files changed, 124 insertions(+), 6 deletions(-)

diff --git a/modules/io/doc/mmcif.rst b/modules/io/doc/mmcif.rst
index e172b34a3..38934cc79 100644
--- a/modules/io/doc/mmcif.rst
+++ b/modules/io/doc/mmcif.rst
@@ -160,6 +160,42 @@ of the annotation available.
 
   .. method:: GetStructDetails()
 
+  .. method:: AddMMCifPDBChainTr(cif_chain_id, pdb_chain_id)
+
+    Set up a translation for a certain mmCIF chain name to the traditional PDB
+    chain name.
+
+    :param cif_chain_id: atom_site.label_asym_id
+    :type cif_chain_id: :class:`str`
+    :param pdb_chain_id: atom_site.auth_asym_id
+    :type pdb_chain_id: :class:`str`
+
+  .. method:: GetMMCifPDBChainTr(cif_chain_id)
+
+    Get the translation of a certain mmCIF chain name to the traditional PDB
+    chain name.
+
+    :param cif_chain_id: atom_site.label_asym_id
+    :type cif_chain_id: :class:`str`
+    :returns: atom_site.auth_asym_id as :class:`str`
+
+  .. method:: AddPDBCMMCifhainTr(pdb_chain_id, cif_chain_id)
+
+    Set up a translation for a certain PDB chain name to the mmCIF chain name.
+
+    :param pdb_chain_id: atom_site.label_asym_id
+    :type pdb_chain_id: :class:`str`
+    :param cif_chain_id: atom_site.auth_asym_id
+    :type cif_chain_id: :class:`str`
+
+  .. method:: GetPDBMMCifChainTr(pdb_chain_id)
+
+    Get the translation of a certain PDB chain name to the mmCIF chain name.
+
+    :param pdb_chain_id: atom_site.auth_asym_id
+    :type pdb_chain_id: :class:`str`
+    :returns: atom_site.label_asym_id as :class:`str`
+
 .. class:: MMCifInfoCitation
 
   This stores citation information from an input file.
@@ -794,4 +830,5 @@ of the annotation available.
 
 ..  LocalWords:  cas isbn pubmed asu seqres conop ConnectAll casp COMPND OBSLTE
 ..  LocalWords:  SPRSDE pdb func autofunction exptl attr pdbx oper conf spr dif
-..  LocalWords:  biounits biounit uniprot UNP seqs
+..  LocalWords:  biounits biounit uniprot UNP seqs AddMMCifPDBChainTr cif asym
+..  LocalWords:  auth GetMMCifPDBChainTr AddPDBCMMCifhainTr GetPDBMMCifChainTr
diff --git a/modules/io/pymod/export_mmcif_io.cc b/modules/io/pymod/export_mmcif_io.cc
index 6654393be..18ec3713b 100644
--- a/modules/io/pymod/export_mmcif_io.cc
+++ b/modules/io/pymod/export_mmcif_io.cc
@@ -280,6 +280,10 @@ void export_mmcif_io()
     .def("GetStructDetails", &MMCifInfo::GetStructDetails)
     .def("SetObsoleteInfo", &MMCifInfo::SetObsoleteInfo)
     .def("GetObsoleteInfo", &MMCifInfo::GetObsoleteInfo)
+.def("AddMMCifPDBChainTr", &MMCifInfo::AddMMCifPDBChainTr)
+    .def("GetMMCifPDBChainTr", &MMCifInfo::GetMMCifPDBChainTr)
+    .def("AddPDBMMCifChainTr", &MMCifInfo::AddPDBMMCifChainTr)
+    .def("GetPDBMMCifChainTr", &MMCifInfo::GetPDBMMCifChainTr)
     .add_property("citations", make_function(&MMCifInfo::GetCitations,
                                    return_value_policy<copy_const_reference>()))
     .add_property("biounits", make_function(&MMCifInfo::GetBioUnits,
diff --git a/modules/io/src/mol/mmcif_info.cc b/modules/io/src/mol/mmcif_info.cc
index c8565a2e8..29f0db746 100644
--- a/modules/io/src/mol/mmcif_info.cc
+++ b/modules/io/src/mol/mmcif_info.cc
@@ -22,6 +22,42 @@
 
 namespace ost { namespace io {
 
+void MMCifInfo::AddMMCifPDBChainTr(String cif, String pdb)
+{
+  std::map<String, String>::iterator tr_it = cif_2_pdb_chain_id_.find(cif);
+  if (tr_it != cif_2_pdb_chain_id_.end()) {
+    throw IOException("mmCIF chain id '"+ cif +"' is already mapped to '"+
+                      tr_it->second+"'.");
+  }
+  cif_2_pdb_chain_id_.insert(std::pair<String, String>(cif, pdb));
+}
+
+String MMCifInfo::GetMMCifPDBChainTr(String cif) const
+{
+  std::map<String, String>::const_iterator tr_it =
+    cif_2_pdb_chain_id_.find(cif);
+  if (tr_it == cif_2_pdb_chain_id_.end()) { return ""; }
+  return tr_it->second;
+}
+
+void MMCifInfo::AddPDBMMCifChainTr(String pdb, String cif)
+{
+  std::map<String, String>::iterator tr_it = pdb_2_cif_chain_id_.find(pdb);
+  if (tr_it != pdb_2_cif_chain_id_.end()) {
+    throw IOException("PDB chain id '"+ pdb +"' is already mapped to '"+
+                      tr_it->second+"'.");
+  }
+  pdb_2_cif_chain_id_.insert(std::pair<String, String>(pdb, cif));
+}
+
+String MMCifInfo::GetPDBMMCifChainTr(String pdb) const
+{
+  std::map<String, String>::const_iterator tr_it =
+    pdb_2_cif_chain_id_.find(pdb);
+  if (tr_it == pdb_2_cif_chain_id_.end()) { return ""; }
+  return tr_it->second;
+}
+
 void MMCifInfo::AddAuthorsToCitation(StringRef id, std::vector<String> list)
 {
   // find citation
diff --git a/modules/io/src/mol/mmcif_info.hh b/modules/io/src/mol/mmcif_info.hh
index b240e5a1c..3eebf54b2 100644
--- a/modules/io/src/mol/mmcif_info.hh
+++ b/modules/io/src/mol/mmcif_info.hh
@@ -22,6 +22,7 @@
 #include <vector>
 #include <map>
 #include <boost/shared_ptr.hpp>
+#include <ost/seq/sequence_list.hh>
 #include <ost/geom/geom.hh>
 #include <ost/string_ref.hh>
 #include <ost/io/module_config.hh>
@@ -771,6 +772,30 @@ public:
   /// \return experiment resolution
   Real GetResolution() const { return resolution_; }
 
+  /// \brief Add a new mmCIF/ PDB chain name tuple.
+  ///
+  /// \param cif chain name as used by the mmCIF file (label_asym_id)
+  /// \param pdb chain name as used in the PDB file (auth_asym_id)
+  void AddMMCifPDBChainTr(String cif, String pdb);
+
+  /// \brief Get a PDB chain name for a CIF chain name
+  ///
+  /// \param cif chain name as used by the mmCIF file (label_asym_id)
+  /// \return chain name as used in the PDB file (auth_asym_id)
+  String GetMMCifPDBChainTr(String cif) const;
+
+  /// \brief Add a new PDB/ mmCIF chain name tuple.
+  ///
+  /// \param pdb chain name as used by the PDB file (auth_asym_id)
+  /// \param cif chain name as used in the mmCIF file (label_asym_id)
+  void AddPDBMMCifChainTr(String pdb, String cif);
+
+  /// \brief Get a CIF chain name for a PDB chain name
+  ///
+  /// \param pdb chain name as used by the mmCIF file (auth_asym_id)
+  /// \return chain name as used in the PDB file (label_asym_id)
+  String GetPDBMMCifChainTr(String pdb) const;
+
   /// \brief Add a biounit
   ///
   /// \param bu biounit to be added
@@ -848,6 +873,8 @@ private:
   std::vector<MMCifInfoBioUnit>  biounits_;   ///< list of biounits
   std::vector<MMCifInfoTransOpPtr> transops_;
 	MMCifInfoStructRefs            struct_refs_;
+  std::map<String, String> cif_2_pdb_chain_id_;
+  std::map<String, String> pdb_2_cif_chain_id_;
 };
 
 
diff --git a/modules/io/src/mol/mmcif_reader.cc b/modules/io/src/mol/mmcif_reader.cc
index 2bcdee810..762939ca8 100644
--- a/modules/io/src/mol/mmcif_reader.cc
+++ b/modules/io/src/mol/mmcif_reader.cc
@@ -1539,6 +1539,7 @@ void MMCifReader::OnEndData()
   // process chain types
   std::vector<std::pair<mol::ChainHandle, String> >::const_iterator css;
   MMCifEntityDescMap::const_iterator edm_it;
+  String pdb_auth_chain_name;
   for (css = chain_id_pairs_.begin(); css != chain_id_pairs_.end(); ++css) {
     edm_it = entity_desc_map_.find(css->second);
 
@@ -1548,6 +1549,9 @@ void MMCifReader::OnEndData()
       if (edm_it->second.seqres.length() > 0) {
         seqres_.AddSequence(seq::CreateSequence(css->first.GetName(),
                                                 edm_it->second.seqres));
+        pdb_auth_chain_name = css->first.GetStringProp("pdb_auth_chain_name");
+        info_.AddMMCifPDBChainTr(css->first.GetName(), pdb_auth_chain_name);
+        info_.AddPDBMMCifChainTr(pdb_auth_chain_name, css->first.GetName());
       } else if (edm_it->second.type!=mol::CHAINTYPE_WATER) {
         // mark everything that doesn't have SEQRES as ligand and isn't of type 
         // water as ligand
diff --git a/modules/io/tests/test_mmcif_info.cc b/modules/io/tests/test_mmcif_info.cc
index 3d2674797..65e4fdaec 100644
--- a/modules/io/tests/test_mmcif_info.cc
+++ b/modules/io/tests/test_mmcif_info.cc
@@ -202,7 +202,17 @@ BOOST_AUTO_TEST_CASE(mmcif_info)
   #else
   BOOST_CHECK_CLOSE(info.GetResolution(), 1.9f, 0.001f);
   #endif
- 
+
+  info.AddMMCifPDBChainTr("A", "B");
+  BOOST_CHECK_THROW(info.AddMMCifPDBChainTr("A", "B"), IOException);
+  BOOST_CHECK("B" == info.GetMMCifPDBChainTr("A"));
+  BOOST_CHECK("" == info.GetMMCifPDBChainTr("C"));
+
+  info.AddPDBMMCifChainTr("A", "B");
+  BOOST_CHECK_THROW(info.AddPDBMMCifChainTr("A", "B"), IOException);
+  BOOST_CHECK("B" == info.GetPDBMMCifChainTr("A"));
+  BOOST_CHECK("" == info.GetPDBMMCifChainTr("C"));
+
   BOOST_MESSAGE("  done.");
 }
 
diff --git a/modules/io/tests/testfiles/mmcif/atom_site.mmcif b/modules/io/tests/testfiles/mmcif/atom_site.mmcif
index ce890cf9d..68b2fbb57 100644
--- a/modules/io/tests/testfiles/mmcif/atom_site.mmcif
+++ b/modules/io/tests/testfiles/mmcif/atom_site.mmcif
@@ -168,10 +168,10 @@ ATOM C  CB  ILE  A  13  . 1  21.236  34.463  16.492  1.00  22.67  .  13  21  ? A
 ATOM C  CG1 ILE  A  13  . 1  20.478  33.469  17.371  1.00  22.14  .  13  22  ? A
 ATOM C  CG2 ILE  A  13  . 1  21.357  33.986  15.016  1.00  21.75  .  13  23  ? A
 # - - - - data truncated for brevity - - - -
-HETATM C C1 APS  C  1  1  1  4.171  29.012   7.116  0.58  17.27  1 300  101 ? A
-HETATM C C2 APS  C  1  1  1  4.949  27.758   6.793  0.58  16.95  1 300  102 ? A
-HETATM O O3 APS  C  1  1  1  4.800  26.678   7.393  0.58  16.85  1 300  103 ? A
-HETATM N N4 APS  C  1  1  1  5.930  27.841   5.869  0.58  16.43  1 300  104 ? A
+HETATM C C1 APS  C  1  1  1  4.171  29.012   7.116  0.58  17.27  1 300  101 ? C
+HETATM C C2 APS  C  1  1  1  4.949  27.758   6.793  0.58  16.95  1 300  102 ? C
+HETATM O O3 APS  C  1  1  1  4.800  26.678   7.393  0.58  16.85  1 300  103 ? C
+HETATM N N4 APS  C  1  1  1  5.930  27.841   5.869  0.58  16.43  1 300  104 ? C
 # - - - - data truncated for brevity - - - -
 # chain to be ignored by 'restrict_chains' feature
 ATOM N  N   ILE  Z  1  . 1  23.664  33.855  16.884  1.00  22.08  .  1  17  ? Z
-- 
GitLab