From 944fd29a9a1d8d2466315a518738ec30666da35a Mon Sep 17 00:00:00 2001
From: Stefan Bienert <stefan.bienert@unibas.ch>
Date: Mon, 2 Jul 2012 11:33:45 +0200
Subject: [PATCH] Added bio unit id (pdbx_struct_assembly.id) to MMCIFBioUnit
 class. (Was forgotten, before)

---
 modules/io/doc/mmcif.rst              | 24 +++++++++++++++++++++---
 modules/io/pymod/export_mmcif_io.cc   |  3 +++
 modules/io/src/mol/mmcif_info.hh      | 15 ++++++++++++++-
 modules/io/src/mol/mmcif_reader.cc    |  7 ++++---
 modules/io/tests/test_mmcif_reader.cc |  2 ++
 5 files changed, 44 insertions(+), 7 deletions(-)

diff --git a/modules/io/doc/mmcif.rst b/modules/io/doc/mmcif.rst
index 7e1c330a4..4b3245f2c 100644
--- a/modules/io/doc/mmcif.rst
+++ b/modules/io/doc/mmcif.rst
@@ -411,6 +411,14 @@ of the annotation available.
   This stores information how a structure is to be assembled to form the
   bio unit.
 
+  .. attribute:: id
+
+    The id of a bio unit as given by the original mmCIF file.
+
+    Also available as :meth:`GetID`. May also be modified by :meth:`SetID`.
+
+    :type: :class:`str`
+
   .. attribute:: details
 
     Special aspects of the biological assembly. If not provided, resembles an
@@ -434,6 +442,14 @@ of the annotation available.
     Also available as :meth:`GetOperations`. May be modified by
     :meth:`AddOperations`
 
+  .. method:: GetID()
+
+    See :attr:`id`
+
+  .. method:: SetID(id)
+
+    See :attr:`id`
+
   .. method:: GetDetails()
 
     See :attr:`details`
@@ -688,7 +704,7 @@ of the annotation available.
   link of polymers in the mmCIF file to sequences stored in external databases 
   such as uniprot. The related categories ``struct_ref_seq`` and 
   ``struct_ref_seq_dif`` also list differences between the sequences of the 
-  deposited structure and the sequences in the database. A promintent example of 
+  deposited structure and the sequences in the database. A prominent example of 
   such differences include point mutations and/or expression tags.
 
   .. attribute:: db_name
@@ -769,8 +785,10 @@ of the annotation available.
   .. attribute:: details
 
     A textual description of the difference, e.g. point mutation, 
-    expressiontag, purification artifact.
+    expression tag, purification artifact.
 
     :type: :class:`str`
+
 ..  LocalWords:  cas isbn pubmed asu seqres conop ConnectAll casp COMPND OBSLTE
-..  LocalWords:  SPRSDE pdb func
+..  LocalWords:  SPRSDE pdb func autofunction exptl attr pdbx oper conf spr dif
+..  LocalWords:  biounits biounit uniprot UNP seqs
diff --git a/modules/io/pymod/export_mmcif_io.cc b/modules/io/pymod/export_mmcif_io.cc
index 70e6b4659..c94c72a67 100644
--- a/modules/io/pymod/export_mmcif_io.cc
+++ b/modules/io/pymod/export_mmcif_io.cc
@@ -181,12 +181,15 @@ void export_mmcif_io()
     .def("AddOperations", &MMCifInfoBioUnit::AddOperations)
     .def("GetOperations", make_function(&MMCifInfoBioUnit::GetOperations,
                                    return_value_policy<copy_const_reference>()))
+    .def("SetID", &MMCifInfoBioUnit::SetID)
+    .def("GetID", &MMCifInfoBioUnit::GetID)
     .add_property("details", &MMCifInfoBioUnit::GetDetails,
                   &MMCifInfoBioUnit::SetDetails)
     .add_property("chains", make_function(&MMCifInfoBioUnit::GetChainList,
                                    return_value_policy<copy_const_reference>()))
     .add_property("operations", make_function(&MMCifInfoBioUnit::GetOperations,
                                    return_value_policy<copy_const_reference>()))
+    .add_property("details", &MMCifInfoBioUnit::GetID, &MMCifInfoBioUnit::SetID)
   ;
 
   class_<MMCifInfoStructRefs>("MMCifInfoStructRefs", init<>())
diff --git a/modules/io/src/mol/mmcif_info.hh b/modules/io/src/mol/mmcif_info.hh
index 38e49f58b..b240e5a1c 100644
--- a/modules/io/src/mol/mmcif_info.hh
+++ b/modules/io/src/mol/mmcif_info.hh
@@ -238,7 +238,16 @@ typedef boost::shared_ptr<MMCifInfoTransOp> MMCifInfoTransOpPtr;
 class DLLEXPORT_OST_IO MMCifInfoBioUnit {
 public:
   /// \brief Create a biounit.
-  MMCifInfoBioUnit(): details_("") {};
+  MMCifInfoBioUnit(): id_(""), details_("") {};
+
+  /// \brief Set id
+  ///
+  /// \param id id
+  void SetID(String id) { id_ = id; }
+  /// \brief Get id
+  ///
+  /// \return id
+  String GetID() const { return id_; }
 
   /// \brief Set details
   ///
@@ -274,6 +283,9 @@ public:
   }
 
   bool operator==(const MMCifInfoBioUnit& bu) const {
+    if (this->id_ != bu.id_) {
+      return false;
+    }
     if (this->details_ != bu.details_) {
       return false;
     }
@@ -314,6 +326,7 @@ public:
   }
 
 private:
+  String id_;                  ///< pdbx_struct_assembly.id
   String details_;             ///< pdbx_struct_assembly.details
   std::vector<String> chains_; ///< chains involved in this assembly
   std::vector<std::vector<MMCifInfoTransOpPtr> > operations_;
diff --git a/modules/io/src/mol/mmcif_reader.cc b/modules/io/src/mol/mmcif_reader.cc
index 409902ab4..2bcdee810 100644
--- a/modules/io/src/mol/mmcif_reader.cc
+++ b/modules/io/src/mol/mmcif_reader.cc
@@ -975,7 +975,7 @@ void MMCifReader::ParsePdbxStructAssemblyGen(const std::vector<StringRef>& colum
   MMCifBioUAssembly assembly;
   assembly.biounit = MMCifInfoBioUnit();
 
-  assembly.biounit.SetDetails(columns[indices_[ASSEMBLY_ID]].str());
+  assembly.biounit.SetID(columns[indices_[ASSEMBLY_ID]].str());
 
   std::vector<StringRef> tmp_chains=columns[indices_[ASYM_ID_LIST]].split(',');
   std::vector<StringRef>::const_iterator tc_it;
@@ -1586,14 +1586,15 @@ void MMCifReader::OnEndData()
        bua_it != bu_assemblies_.end();
        ++bua_it) {
     // pair with pdbx_struct_assembly entry
-    buom_it = bu_origin_map_.find(bua_it->biounit.GetDetails());
+    buom_it = bu_origin_map_.find(bua_it->biounit.GetID());
     if (buom_it == bu_origin_map_.end()) {
       throw IOException(this->FormatDiagnostic(STAR_DIAG_ERROR,
                                                "No pdbx_struct_assembly.id '"+
-                                               bua_it->biounit.GetDetails() +
+                                               bua_it->biounit.GetID() +
                          "' found as requested by pdbx_struct_assembly_gen.")); 
     }
     bua_it->biounit.SetDetails(buom_it->second);
+    bua_it->biounit.SetID(buom_it->first);
 
     // pair with pdbx_struct_oper_list
     for (aol_it = bua_it->operations.begin();
diff --git a/modules/io/tests/test_mmcif_reader.cc b/modules/io/tests/test_mmcif_reader.cc
index 953ac3132..b17b22ea8 100644
--- a/modules/io/tests/test_mmcif_reader.cc
+++ b/modules/io/tests/test_mmcif_reader.cc
@@ -1254,9 +1254,11 @@ BOOST_AUTO_TEST_CASE(mmcif_testreader)
   BOOST_MESSAGE("          done.");
 
   BOOST_MESSAGE("          reading data fields which should not fail...");
+  BOOST_MESSAGE(mmcif_p.GetInfo().GetBioUnits().back().GetID());
   BOOST_CHECK(mmcif_p.GetInfo().GetMethod().str() == "Deep-fry");
   BOOST_CHECK(mmcif_p.GetInfo().GetBioUnits().back().GetDetails() ==
               "author_defined_assembly");
+  BOOST_CHECK(mmcif_p.GetInfo().GetBioUnits().back().GetID() == "2");
   BOOST_CHECK(mmcif_p.GetInfo().GetBioUnits().back().GetChainList().back() ==
               "F");
   MMCifInfoBioUnit bu = mmcif_p.GetInfo().GetBioUnits().back();
-- 
GitLab