From b7f87fa4c44db5a8e19a435492313a912365f584 Mon Sep 17 00:00:00 2001
From: Marco Biasini <marco.biasini@unibas.ch>
Date: Tue, 8 May 2012 14:32:56 +0200
Subject: [PATCH] added support for struct_ref, and friends

The mmCIF reader now also parses struct_ref_seq and
struct_ref_seq_dif. Includes unit tests and docs
---
 modules/io/doc/mmcif.rst                      |  99 ++++++++++++-
 modules/io/pymod/export_mmcif_io.cc           |  43 +++++-
 modules/io/src/mol/mmcif_info.cc              |  36 +++++
 modules/io/src/mol/mmcif_info.hh              |  92 +++++++++++-
 modules/io/src/mol/mmcif_reader.cc            | 136 +++++++++++++++++-
 modules/io/src/mol/mmcif_reader.hh            |  41 +++++-
 modules/io/tests/test_mmcif_reader.cc         |  34 +++++
 .../io/tests/testfiles/mmcif/struct_ref.cif   |  47 ++++++
 8 files changed, 522 insertions(+), 6 deletions(-)
 create mode 100644 modules/io/tests/testfiles/mmcif/struct_ref.cif

diff --git a/modules/io/doc/mmcif.rst b/modules/io/doc/mmcif.rst
index 078a66667..7e1c330a4 100644
--- a/modules/io/doc/mmcif.rst
+++ b/modules/io/doc/mmcif.rst
@@ -38,6 +38,9 @@ The following categories of a mmCIF file are considered by the reader:
   the :class:`entity <ost.mol.EntityHandle>`
 * ``pdbx_database_PDB_obs_spr``: Verbose information on obsoleted/ superseded
   entries, stored in :class:`MMCifInfoObsolete`.
+* ``struct_ref`` stored in :class:`MMCifInfoStructRef`
+* ``struct_ref_seq`` stored in :class:`MMCifInfoStructRefSeq`
+* ``struct_ref_seq_dif`` stored in :class:`MMCifInfoStructRefDif`
 
 Info Classes
 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
@@ -93,6 +96,9 @@ of the annotation available.
     Also available as :meth:`GetStructDetails`. May also be modified by
     :meth:`SetStructDetails`.
 
+  .. attribute:: struct_refs
+
+    Lists all links to external databases in the mmCIF file.
   .. method:: AddCitation(citation)
 
     Add a citation to the citation list of an info object.
@@ -452,7 +458,7 @@ of the annotation available.
 
     See :attr:`operations`
 
-  .. method:: PDBize(asu, seqres=None, min_polymer_size=10)
+.. function:: PDBize(asu, seqres=None, min_polymer_size=10)
 
     Returns the biological assembly (bio unit) for an entity. The new entity
     created is well suited to be saved as a PDB file. Therefore the function
@@ -676,6 +682,95 @@ of the annotation available.
 
     See :attr:`replace_pdb_id`
 
-..  LocalWords:  autofunction ChainTypes exptl attr pdbx oper conf spr biounits
+.. class:: MMCifINfoStructRef
+
+  Holds the information of the struct_ref category. The category describes the 
+  link of polymers in the mmCIF file to sequences stored in external databases 
+  such as uniprot. The related categories ``struct_ref_seq`` and 
+  ``struct_ref_seq_dif`` also list differences between the sequences of the 
+  deposited structure and the sequences in the database. A promintent example of 
+  such differences include point mutations and/or expression tags.
+
+  .. attribute:: db_name
+
+    
+    Name of the external database, for example UNP for uniprot.
+
+    :type: :class:`str`
+
+
+  .. attribute:: db_id
+    
+    Name of the reference sequence in the database pointed to by :attr:`db_name`.
+
+    :type: :class:`str`
+  
+  .. attribute:: db_access
+    
+    Alternative accession code for the sequence in the database pointed to by 
+    :attr:`db_name`.
+
+    :type: :class:`str`
+
+  .. method:: GetAlignedSeq(name)
+
+    Returns the aligned sequence for the given name, None if the sequence does 
+    not exist.
+  
+  .. attribute:: aligned_seqs
+
+    List of aligned sequences (all entries of the struct_ref_seq category 
+    mapping to this struct_ref).
+
+.. class:: MMCifInfoStructRefSeq
+
+  An aligned range of residues between a sequence in a reference database and the 
+  deposited sequence.
+
+  .. attribute:: align_id
+    
+    Uniquely identifies every struct_ref_seq item in the mmCIF file.
+    :type: :class:`str`
+
+  .. attribute:: seq_begin
+                 seq_end
+    The starting point (1-based) and end point of the aligned range in the 
+    deposited sequence, respectively.
+
+   :type: :class:`int`
+   
+  .. attribute:: db_begin
+                 db_end
+    The starting point (1-based) and end point of the aligned range in the 
+    database sequence, respectively.
+
+   :type: :class:`int`
+
+  .. attribute:: difs
+
+    List of differences between the deposited sequence and the sequence in the 
+    database.
+
+ .. attribute:: chain_name
+   
+   Chain name of the polymer in the mmCIF file.
+
+.. class:: MMCifInfoStructRefSeqDif
+
+  A particular difference between the deposited sequence and the sequence in 
+  the database.
+
+  .. attribute:: rnum
+
+    The residue number (1-based) of the residue in the deposited sequence
+   
+    :type: :class:`int`
+
+  .. attribute:: details
+
+    A textual description of the difference, e.g. point mutation, 
+    expressiontag, purification artifact.
+
+    :type: :class:`str`
 ..  LocalWords:  cas isbn pubmed asu seqres conop ConnectAll casp COMPND OBSLTE
 ..  LocalWords:  SPRSDE pdb func
diff --git a/modules/io/pymod/export_mmcif_io.cc b/modules/io/pymod/export_mmcif_io.cc
index b502aa515..908886257 100644
--- a/modules/io/pymod/export_mmcif_io.cc
+++ b/modules/io/pymod/export_mmcif_io.cc
@@ -138,7 +138,37 @@ void export_mmcif_io()
                                                 init<>())
     .def(vector_indexing_suite<std::vector<MMCifInfoTransOpPtrList >, true >())
   ;
-
+  class_<MMCifInfoStructRef, MMCifInfoStructRefPtr>("MMCifInfoStructRef", no_init)
+  	.add_property("db_name", make_function(&MMCifInfoStructRef::GetDBName, 
+  				        return_value_policy<copy_const_reference>()))
+  	.add_property("db_id", make_function(&MMCifInfoStructRef::GetDBID, 
+  				        return_value_policy<copy_const_reference>()))
+  	.add_property("entity_id", make_function(&MMCifInfoStructRef::GetEntityID, 
+  				        return_value_policy<copy_const_reference>()))
+  	.add_property("db_access", make_function(&MMCifInfoStructRef::GetDBAccess, 
+  				        return_value_policy<copy_const_reference>()))
+  	.def("GetAlignedSeq", &MMCifInfoStructRef::GetAlignedSeq, arg("align_id"))
+  	.def("GetAlignedSeqs", &MMCifInfoStructRef::GetAlignedSeqs)
+  	.add_property("aligned_seqs", &MMCifInfoStructRef::GetAlignedSeqs)
+ ; 
+  class_<MMCifInfoStructRefSeq, MMCifInfoStructRefSeqPtr>("MMCifInfoStructRefSeq", no_init)
+  	.add_property("align_id", make_function(&MMCifInfoStructRefSeq::GetID, 
+  				        return_value_policy<copy_const_reference>()))
+  	.add_property("chain_name", make_function(&MMCifInfoStructRefSeq::GetChainName, 
+  				        return_value_policy<copy_const_reference>()))
+  	.add_property("seq_begin", &MMCifInfoStructRefSeq::GetSeqBegin)
+  	.add_property("seq_end", &MMCifInfoStructRefSeq::GetSeqEnd)
+  	.add_property("db_begin", &MMCifInfoStructRefSeq::GetDBBegin)
+  	.add_property("db_end", &MMCifInfoStructRefSeq::GetDBEnd)
+  	.add_property("difs", make_function(&MMCifInfoStructRefSeq::GetDifs,
+  				        return_value_policy<copy_const_reference>()))
+  ;
+  class_<MMCifInfoStructRefSeqDif, 
+  	     MMCifInfoStructRefSeqDifPtr>("MMCifInfoStructRefSeqDif", no_init)
+  	.add_property("details", make_function(&MMCifInfoStructRefSeqDif::GetDetails, 
+  				        return_value_policy<copy_const_reference>()))
+  	.add_property("rnum", &MMCifInfoStructRefSeqDif::GetRNum)
+  ;
   class_<MMCifInfoBioUnit>("MMCifInfoBioUnit", init<>())
     .def("SetDetails", &MMCifInfoBioUnit::SetDetails)
     .def("GetDetails", &MMCifInfoBioUnit::GetDetails)
@@ -156,6 +186,15 @@ void export_mmcif_io()
                                    return_value_policy<copy_const_reference>()))
   ;
 
+  class_<MMCifInfoStructRefs>("MMCifInfoStructRefs", init<>())
+  	.def(vector_indexing_suite<MMCifInfoStructRefs, true>())
+  ;
+  class_<MMCifInfoStructRefSeqs>("MMCifInfoStructRefSeqs", init<>())
+  	.def(vector_indexing_suite<MMCifInfoStructRefSeqs, true>())
+  ;
+  class_<MMCifInfoStructRefSeqDifs>("MMCifInfoStructRefSeqDifs", init<>())
+  	.def(vector_indexing_suite<MMCifInfoStructRefSeqDifs, true>())
+  ;
   typedef std::vector<MMCifInfoBioUnit> MMCifInfoBioUnitList;
   class_<std::vector<MMCifInfoBioUnit> >("MMCifInfoBioUnitList", init<>())
     .def(vector_indexing_suite<std::vector<MMCifInfoBioUnit> >())
@@ -246,6 +285,8 @@ void export_mmcif_io()
                                    return_value_policy<copy_const_reference>()))
     .add_property("struct_details", &MMCifInfo::GetStructDetails,
                   &MMCifInfo::SetStructDetails)
+    .add_property("struct_refs", make_function(&MMCifInfo::GetStructRefs,
+    			        return_value_policy<copy_const_reference>()))
     .add_property("obsolete", &MMCifInfo::GetObsoleteInfo,
                   &MMCifInfo::SetObsoleteInfo)
  ;
diff --git a/modules/io/src/mol/mmcif_info.cc b/modules/io/src/mol/mmcif_info.cc
index 1e4c89281..d99b0bd22 100644
--- a/modules/io/src/mol/mmcif_info.cc
+++ b/modules/io/src/mol/mmcif_info.cc
@@ -36,4 +36,40 @@ void MMCifInfo::AddAuthorsToCitation(StringRef id, std::vector<String> list)
   throw IOException("No citation for identifier '" + id.str() + "' found.");
 }
 
+
+
+MMCifInfoStructRefSeqPtr 
+MMCifInfoStructRef::AddAlignedSeq(const String& aid, const String& chain_name, 
+		                              int seq_begin, int seq_end, int db_begin, 
+		                              int db_end)
+{
+	std::map<String, MMCifInfoStructRefSeqPtr>::const_iterator i=seqs_.find(aid);
+	if (i!=seqs_.end()) {
+		throw IOException("duplicate align_id for struct_ref '"+id_+"'");
+	}
+	MMCifInfoStructRefSeqPtr p(new MMCifInfoStructRefSeq(aid, chain_name,
+				                                               seq_begin, seq_end, 
+				                                               db_begin, db_end));
+	seqs_[aid]=p;
+	return p;
+}
+
+
+MMCifInfoStructRefSeqPtr 
+MMCifInfoStructRef::GetAlignedSeq(const String& aid) const
+{
+
+  std::map<String, MMCifInfoStructRefSeqPtr>::const_iterator i=seqs_.find(aid);
+  return i==seqs_.end() ? MMCifInfoStructRefSeqPtr() : i->second;
+}
+
+MMCifInfoStructRefSeqDifPtr 
+MMCifInfoStructRefSeq::AddDif(int rnum, const String& details)
+{
+	MMCifInfoStructRefSeqDifPtr d(new MMCifInfoStructRefSeqDif(rnum, details));
+	difs_.push_back(d);
+	return d;
+}
+
+
 }} //ns
diff --git a/modules/io/src/mol/mmcif_info.hh b/modules/io/src/mol/mmcif_info.hh
index 469e52188..81d5b546a 100644
--- a/modules/io/src/mol/mmcif_info.hh
+++ b/modules/io/src/mol/mmcif_info.hh
@@ -20,6 +20,7 @@
 #define OST_MMCIF_INFO_HH
 
 #include <vector>
+#include <map>
 #include <boost/shared_ptr.hpp>
 #include <ost/geom/geom.hh>
 #include <ost/string_ref.hh>
@@ -605,6 +606,92 @@ private:
   String replaced_pdb_id_; ///< replaced entry
 };
 
+class MMCifInfoStructRef;
+class MMCifInfoStructRefSeq;
+class MMCifInfoStructRefSeqDif;
+
+
+typedef boost::shared_ptr<MMCifInfoStructRef> MMCifInfoStructRefPtr;
+typedef boost::shared_ptr<MMCifInfoStructRefSeq> MMCifInfoStructRefSeqPtr;
+typedef boost::shared_ptr<MMCifInfoStructRefSeqDif> MMCifInfoStructRefSeqDifPtr;
+
+typedef std::vector<MMCifInfoStructRefPtr> MMCifInfoStructRefs;
+typedef std::vector<MMCifInfoStructRefSeqPtr> MMCifInfoStructRefSeqs;
+typedef std::vector<MMCifInfoStructRefSeqDifPtr> MMCifInfoStructRefSeqDifs;
+class DLLEXPORT_OST_IO MMCifInfoStructRef {
+public:
+  MMCifInfoStructRef(const String& id, const String& ent_id, 
+  		               const String& db_name, 
+  		               const String& db_ident, const String& db_access):
+  	id_(id), ent_id_(ent_id), db_name_(db_name), db_ident_(db_ident), 
+  	db_access_(db_access)
+	{ }
+  const String& GetID() const { return id_; }
+  const String& GetDBName() const { return db_name_; }
+  const String& GetDBID() const { return db_ident_; }
+  const String& GetEntityID() const { return ent_id_; }
+  const String& GetDBAccess() const { return db_access_; }
+  MMCifInfoStructRefSeqPtr AddAlignedSeq(const String& align_id, 
+  		                                   const String& chain_name, int seq_begin, 
+  		                                   int seq_end, int db_begin, int db_end);
+  MMCifInfoStructRefSeqPtr GetAlignedSeq(const String& align_id) const;
+  MMCifInfoStructRefSeqs GetAlignedSeqs() const
+	{
+		MMCifInfoStructRefSeqs seqs;
+		seqs.reserve(seqs_.size());
+		for (std::map<String, MMCifInfoStructRefSeqPtr>::const_iterator
+				 i=seqs_.begin(), e=seqs_.end(); i!=e; ++i) {
+		  seqs.push_back(i->second);
+		}
+		return seqs;
+	}
+private:
+	String  id_;
+	String  ent_id_;
+	String  db_name_;
+	String  db_ident_;
+	String  db_access_;
+	std::map<String, MMCifInfoStructRefSeqPtr> seqs_;
+};
+
+class DLLEXPORT_OST_IO MMCifInfoStructRefSeq {
+public:
+  MMCifInfoStructRefSeq(const String& align_id, const String& chain_name, 
+  		                  int seq_begin, int seq_end, 
+  		                  int db_begin, int db_end):
+  	id_(align_id), chain_name_(chain_name), 
+  	seq_begin_(seq_begin), seq_end_(seq_end), db_begin_(db_begin), db_end_(db_end)
+	{ }
+
+  const String& GetID() const { return id_; }
+  const String& GetChainName() const { return chain_name_; }
+  int GetSeqBegin() const { return seq_begin_; }
+  int GetSeqEnd() const { return seq_end_; }
+  int GetDBBegin() const { return db_begin_; }
+  int GetDBEnd() const { return db_end_; }
+  MMCifInfoStructRefSeqDifPtr AddDif(int seq_num, const String& details);
+  const std::vector<MMCifInfoStructRefSeqDifPtr>& GetDifs() const { return difs_; }
+private:
+	String   id_;
+	String   chain_name_;
+	int      seq_begin_;
+	int      seq_end_;
+	int      db_begin_;
+	int      db_end_;
+	std::vector<MMCifInfoStructRefSeqDifPtr> difs_;
+};
+
+class DLLEXPORT_OST_IO MMCifInfoStructRefSeqDif {
+public:
+	MMCifInfoStructRefSeqDif(int rnum, const String& details): 
+		rnum_(rnum), details_(details) {}
+	int GetRNum() const { return rnum_;}
+	const String& GetDetails() const { return details_; }
+private:
+	int    rnum_;
+	String details_;
+};
+
 /// \brief container class for additional information from MMCif files
 /// 
 /// \section mmcif annotation information
@@ -731,7 +818,8 @@ public:
   {
     return obsolete_;
   }
-
+  const MMCifInfoStructRefs& GetStructRefs() const { return struct_refs_; }
+  void SetStructRefs(const MMCifInfoStructRefs& sr) { struct_refs_=sr; }
 //protected:
 
 private:
@@ -743,8 +831,10 @@ private:
   std::vector<MMCifInfoCitation> citations_;  ///< list of citations
   std::vector<MMCifInfoBioUnit>  biounits_;   ///< list of biounits
   std::vector<MMCifInfoTransOpPtr> transops_;
+	MMCifInfoStructRefs            struct_refs_;
 };
 
+
 }} // ns
 
 #endif
diff --git a/modules/io/src/mol/mmcif_reader.cc b/modules/io/src/mol/mmcif_reader.cc
index 09fa8b6ef..f2b66ee35 100644
--- a/modules/io/src/mol/mmcif_reader.cc
+++ b/modules/io/src/mol/mmcif_reader.cc
@@ -300,7 +300,31 @@ bool MMCifReader::OnBeginLoop(const StarLoopDesc& header)
     this->TryStoreIdx(PDB_ID,         "pdb_id", header);
     this->TryStoreIdx(REPLACE_PDB_ID, "replace_pdb_id", header);
     cat_available = true;
-  }
+  } else if (header.GetCategory() == "struct_ref") {
+  	category_ = STRUCT_REF;
+  	this->TryStoreIdx(SR_ENTITY_ID, "entity_id", header);
+  	this->TryStoreIdx(SR_ID, "id", header);
+  	this->TryStoreIdx(SR_DB_NAME, "db_name", header);
+  	this->TryStoreIdx(SR_DB_CODE, "db_code", header);
+  	indices_[SR_DB_ACCESS]=header.GetIndex("pdbx_db_accession");
+  	cat_available = true;
+	} else if (header.GetCategory() == "struct_ref_seq") {
+	  category_ = STRUCT_REF_SEQ;	
+  	this->TryStoreIdx(SRS_ALIGN_ID, "align_id", header);
+  	this->TryStoreIdx(SRS_STRUCT_REF_ID, "ref_id", header);
+  	this->TryStoreIdx(SRS_ENT_ALIGN_BEG, "seq_align_beg", header);
+  	this->TryStoreIdx(SRS_ENT_ALIGN_END, "seq_align_end", header);
+  	this->TryStoreIdx(SRS_DB_ALIGN_BEG, "db_align_beg", header);
+  	this->TryStoreIdx(SRS_DB_ALIGN_END, "db_align_end", header);
+    indices_[SRS_PDBX_STRAND_ID]=header.GetIndex("pdbx_strand_id");
+	  cat_available = true;
+	} else if (header.GetCategory()=="struct_ref_seq_dif") {
+		category_ = STRUCT_REF_SEQ_DIF;
+  	this->TryStoreIdx(SRSD_ALIGN_ID, "align_id", header);
+  	this->TryStoreIdx(SRSD_RNUM, "seq_num", header);
+  	indices_[SRSD_DETAILS]=header.GetIndex("details");
+  	cat_available = true;
+	}
   category_counts_[category_]++;
   return cat_available;
 }
@@ -1328,6 +1352,18 @@ void MMCifReader::OnDataRow(const StarLoopDesc& header,
     LOG_TRACE("processing pdbx_database_PDB_obs_spr entry")
     this->ParsePdbxDatabasePdbObsSpr(columns);
     break;
+  case STRUCT_REF:
+  	LOG_TRACE("processing struct_ref entry");
+  	this->ParseStructRef(columns);
+  	break;
+  case STRUCT_REF_SEQ:
+  	LOG_TRACE("processing struct_ref entry");
+  	this->ParseStructRefSeq(columns);
+  	break;
+  case STRUCT_REF_SEQ_DIF:
+  	LOG_TRACE("processing struct_ref entry");
+  	this->ParseStructRefSeqDif(columns);
+  	break;
   default:
     throw IOException(this->FormatDiagnostic(STAR_DIAG_ERROR,
                        "Uncatched category '"+ header.GetCategory() +"' found.",
@@ -1379,6 +1415,103 @@ void MMCifReader::AssignSecStructure(mol::EntityHandle ent)
   }
 }
 
+
+void MMCifReader::ParseStructRef(const std::vector<StringRef>& columns)
+{
+	String ent_id=columns[indices_[SR_ENTITY_ID]].str();
+	String db_name=columns[indices_[SR_DB_NAME]].str();
+	String db_code=columns[indices_[SR_DB_CODE]].str();
+	String id=columns[indices_[SR_ID]].str();
+	String db_access;
+	if (indices_[SR_DB_ACCESS]!=-1) {
+		db_access=columns[indices_[SR_DB_ACCESS]].str();
+	}
+	MMCifInfoStructRefPtr sr(new MMCifInfoStructRef(id, ent_id, db_name, 
+				                                          db_code, db_access));
+	struct_refs_.push_back(sr);
+}
+
+void MMCifReader::ParseStructRefSeq(const std::vector<StringRef>& columns)
+{
+ String aln_id=columns[indices_[SRS_ALIGN_ID]].str();
+ String sr_id=columns[indices_[SRS_STRUCT_REF_ID]].str();
+ String chain_name;
+ if (indices_[SRS_PDBX_STRAND_ID]!=-1) {
+ 	 chain_name=columns[indices_[SRS_PDBX_STRAND_ID]].str();
+ }
+ std::pair<bool,int> dbbeg=this->TryGetInt(columns[indices_[SRS_DB_ALIGN_BEG]], 
+ 		                                        "_struct_ref_seq.db_align_beg",
+ 		                                        profile_.fault_tolerant);
+ std::pair<bool,int> dbend=this->TryGetInt(columns[indices_[SRS_DB_ALIGN_END]], 
+ 		                                       "_struct_ref_seq.db_align_end",
+ 		                                       profile_.fault_tolerant);
+ std::pair<bool,int> entbeg=this->TryGetInt(columns[indices_[SRS_ENT_ALIGN_BEG]], 
+ 		                                        "_struct_ref_seq.seq_align_beg",
+ 		                                        profile_.fault_tolerant);
+ std::pair<bool,int> entend=this->TryGetInt(columns[indices_[SRS_ENT_ALIGN_END]], 
+ 		                                        "_struct_ref_seq.seq_align_END",
+ 		                                        profile_.fault_tolerant);
+ if (!(dbbeg.first && dbend.first && entbeg.first && entend.first)) {
+ 	 return;
+ }
+ bool found=false;
+ for (MMCifInfoStructRefs::iterator i=struct_refs_.begin(), 
+ 		  e=struct_refs_.end(); i!=e; ++i) { 
+ 	 if ((*i)->GetID()==sr_id) {
+		 (*i)->AddAlignedSeq(aln_id, chain_name, entbeg.second, entend.second, 
+		 		                 dbbeg.second, dbend.second);
+		 found=true;
+ 	 	 break;
+ 	 }
+ }
+ if (!found) {
+ 	 if (profile_.fault_tolerant) {
+ 	 	 LOG_ERROR("struct_ref_seq.ref_id points to inexistent struct_ref '"
+ 	 	 		       << sr_id <<  "'");
+ 	 	 return;
+ 	 }
+	 std::stringstream ss;
+	 ss << "struct_ref_seq.ref_id points to inexistent struct_ref '";
+	 ss << sr_id << "'";
+	 throw IOException(ss.str());
+ }
+}
+
+void MMCifReader::ParseStructRefSeqDif(const std::vector<StringRef>& columns)
+{
+	String aln_id=columns[indices_[SRSD_ALIGN_ID]].str();
+	std::pair<bool,int> rnum=this->TryGetInt(columns[indices_[SRSD_RNUM]],
+			                                     "_struct_ref_seq_dif.seq_num",
+			                                     profile_.fault_tolerant);
+	if (!rnum.first) {
+		return;
+	}
+  String details;
+  if (indices_[SRSD_DETAILS]!=-1) {
+	  details=columns[indices_[SRSD_DETAILS]].str();
+	}
+	bool found=false;
+  for (MMCifInfoStructRefs::iterator i=struct_refs_.begin(), 
+ 		  e=struct_refs_.end(); i!=e; ++i) { 
+ 	 if (MMCifInfoStructRefSeqPtr s=(*i)->GetAlignedSeq(aln_id)) {
+		 s->AddDif(rnum.second, details); 
+		 found=true;
+ 	 	 break;
+ 	 }
+ }
+ if (!found) {
+ 	 if (profile_.fault_tolerant) {
+ 	 	 LOG_ERROR("struct_ref_seq_dif.align_id points to inexistent "
+ 	 	 		       "struct_ref_seq '" << aln_id <<  "'");
+ 	 	 return;
+ 	 }
+	 std::stringstream ss;
+	 ss << "struct_ref_seq.ref_id points to inexistent struct_ref '";
+	 ss << aln_id << "'";
+	 throw IOException(ss.str());
+ }
+}
+
 void MMCifReader::OnEndData()
 {
   mol::XCSEditor editor=ent_handle_.EditXCS(mol::BUFFERED_EDIT);
@@ -1427,6 +1560,7 @@ void MMCifReader::OnEndData()
   std::vector<MMCifInfoTransOpPtr> operation_list;
   std::map<String, String>::const_iterator buom_it;
   std::vector<MMCifInfoTransOpPtr> operations = info_.GetOperations();
+  info_.SetStructRefs(struct_refs_);
   std::vector<MMCifInfoTransOpPtr>::const_iterator buop_it;
   for (bua_it = bu_assemblies_.begin();
        bua_it != bu_assemblies_.end();
diff --git a/modules/io/src/mol/mmcif_reader.hh b/modules/io/src/mol/mmcif_reader.hh
index af12c813a..2a8f549ec 100644
--- a/modules/io/src/mol/mmcif_reader.hh
+++ b/modules/io/src/mol/mmcif_reader.hh
@@ -229,6 +229,7 @@ protected:
   /// \param columns data row
   void ParseCitation(const std::vector<StringRef>& columns);
 
+	const MMCifInfoStructRefs& GetStructRefs() const { return struct_refs_; }
   /// \brief convert the seqres data item to canonical form. 
   /// 
   /// The seqres sequence lists non-standard residues in paranthesis. For 
@@ -242,7 +243,15 @@ protected:
   ///
   /// \param columns data row
   void ParseCitationAuthor(const std::vector<StringRef>& columns);
-
+  
+  /// \ brief parse a row in the struct_ref category
+  void ParseStructRef(const std::vector<StringRef>& columns);
+  
+  /// \brief parse row in the struct_ref_seq category
+  void ParseStructRefSeq(const std::vector<StringRef>& columns);
+
+  ///  \brief parse row in the struct_ref_seq_dif category
+  void ParseStructRefSeqDif(const std::vector<StringRef>& columns);
   /// \brief Fetch mmCIF exptl information
   ///
   /// \param columns data row
@@ -401,6 +410,32 @@ private:
     METHOD_DETAILS                ///< details about assembly computation
   } PdbxStructAssemblyItems;
 
+  // \enum items of the struct_ref category
+  typedef enum {
+  	SR_ENTITY_ID,
+  	SR_ID,
+  	SR_DB_CODE,
+  	SR_DB_NAME,
+  	SR_DB_ACCESS
+	} StructRefItems;
+	
+	/// \enum items of the struct_ref_seq category
+	typedef enum {
+		SRS_ALIGN_ID,
+		SRS_STRUCT_REF_ID,
+		SRS_PDBX_STRAND_ID,
+		SRS_DB_ALIGN_BEG,
+		SRS_DB_ALIGN_END,
+		SRS_ENT_ALIGN_BEG,
+		SRS_ENT_ALIGN_END
+	} StructRefSeqItems;
+
+	/// \enum items of the struct_ref_seq_dif category
+	typedef enum {
+		SRSD_ALIGN_ID,
+		SRSD_RNUM,
+    SRSD_DETAILS
+	} StructRefSeqDifItems;
   /// \enum items of the pdbx_struct_assembly_gen category
   typedef enum {
     ASSEMBLY_ID,                  ///< link to pdbx_struct_assembly.id
@@ -490,6 +525,9 @@ private:
     STRUCT_CONF,
     STRUCT_SHEET_RANGE,
     PDBX_DATABASE_PDB_OBS_SPR,
+    STRUCT_REF,
+    STRUCT_REF_SEQ,
+    STRUCT_REF_SEQ_DIF,
     DONT_KNOW
   } MMCifCategory;
 
@@ -550,6 +588,7 @@ private:
   std::map<String, String> bu_origin_map_; ///< pdbx_struct_assembly.details
   MMCifHSVector helix_list_; ///< for storing struct_conf sec.struct. data
   MMCifHSVector strand_list_; ///< for storing struct_conf sec.struct. data
+	MMCifInfoStructRefs struct_refs_;
 };
 
 }}
diff --git a/modules/io/tests/test_mmcif_reader.cc b/modules/io/tests/test_mmcif_reader.cc
index e33f6ad6f..b2294ba90 100644
--- a/modules/io/tests/test_mmcif_reader.cc
+++ b/modules/io/tests/test_mmcif_reader.cc
@@ -614,6 +614,40 @@ BOOST_AUTO_TEST_CASE(mmcif_citation_author_tests)
   BOOST_MESSAGE("  done.");
 }
 
+BOOST_AUTO_TEST_CASE(mmcif_struct_ref)
+{
+  mol::EntityHandle eh = mol::CreateEntity();
+  std::ifstream s("testfiles/mmcif/struct_ref.cif");
+  IOProfile profile;
+  MMCifReader mmcif_p(s, eh, profile);
+  mmcif_p.Parse();
+  MMCifInfoStructRefs refs=mmcif_p.GetInfo().GetStructRefs();
+  BOOST_CHECK_EQUAL(refs.size(), 1);
+  MMCifInfoStructRefPtr sr1=refs[0];
+  BOOST_CHECK_EQUAL(sr1->GetDBName(), "UNP");
+  BOOST_CHECK_EQUAL(sr1->GetDBID(), "BLA2_BACCE");
+  BOOST_CHECK_EQUAL(sr1->GetDBAccess(), "P04190");
+  BOOST_CHECK_EQUAL(sr1->GetID(), "1");
+  MMCifInfoStructRefSeqs seqs=sr1->GetAlignedSeqs();
+  BOOST_CHECK_EQUAL(seqs.size(), 2);
+  BOOST_CHECK_EQUAL(seqs[0]->GetID(), "1");
+  BOOST_CHECK_EQUAL(seqs[0]->GetChainName(), "A");
+  BOOST_CHECK_EQUAL(seqs[0]->GetSeqBegin(), 1);
+  BOOST_CHECK_EQUAL(seqs[0]->GetSeqEnd(), 19);
+  BOOST_CHECK_EQUAL(seqs[0]->GetDBBegin(), 31);
+  BOOST_CHECK_EQUAL(seqs[0]->GetDBEnd(), 49);
+  BOOST_CHECK_EQUAL(seqs[1]->GetID(), "13");
+  BOOST_CHECK_EQUAL(seqs[1]->GetChainName(), "B");
+  BOOST_CHECK_EQUAL(seqs[1]->GetSeqBegin(), 1);
+  BOOST_CHECK_EQUAL(seqs[1]->GetSeqEnd(), 19);
+  BOOST_CHECK_EQUAL(seqs[1]->GetDBBegin(), 31);
+  BOOST_CHECK_EQUAL(seqs[1]->GetDBEnd(), 49);
+  MMCifInfoStructRefSeqDifs diffs=seqs[0]->GetDifs();
+  BOOST_CHECK_EQUAL(diffs.size(), 1);
+  BOOST_CHECK_EQUAL(diffs[0]->GetRNum(), 91);
+  BOOST_CHECK_EQUAL(diffs[0]->GetDetails(), "ENGINEERED MUTATION");
+}
+
 BOOST_AUTO_TEST_CASE(mmcif_refine_tests)
 {
   BOOST_MESSAGE("  Running mmcif_refine_tests...");
diff --git a/modules/io/tests/testfiles/mmcif/struct_ref.cif b/modules/io/tests/testfiles/mmcif/struct_ref.cif
new file mode 100644
index 000000000..1d87ec60b
--- /dev/null
+++ b/modules/io/tests/testfiles/mmcif/struct_ref.cif
@@ -0,0 +1,47 @@
+data_2bfl
+# taken from 2bfl.cif 
+_struct_ref.id                         1 
+_struct_ref.db_name                    UNP 
+_struct_ref.db_code                    BLA2_BACCE 
+_struct_ref.entity_id                  1 
+_struct_ref.pdbx_seq_one_letter_code   ? 
+_struct_ref.pdbx_align_begin           ? 
+_struct_ref.biol_id                    . 
+_struct_ref.pdbx_db_accession          P04190 
+# 
+loop_
+_struct_ref_seq.align_id 
+_struct_ref_seq.ref_id 
+_struct_ref_seq.pdbx_PDB_id_code 
+_struct_ref_seq.pdbx_strand_id 
+_struct_ref_seq.seq_align_beg 
+_struct_ref_seq.pdbx_seq_align_beg_ins_code 
+_struct_ref_seq.seq_align_end 
+_struct_ref_seq.pdbx_seq_align_end_ins_code 
+_struct_ref_seq.pdbx_db_accession 
+_struct_ref_seq.db_align_beg 
+_struct_ref_seq.pdbx_db_align_beg_ins_code 
+_struct_ref_seq.db_align_end 
+_struct_ref_seq.pdbx_db_align_end_ins_code 
+_struct_ref_seq.pdbx_auth_seq_align_beg 
+_struct_ref_seq.pdbx_auth_seq_align_end 
+1  1 2BFL A 1   ? 19  ? P04190 31  ? 49  ? 27  45  
+13 1 2BFL B 1   ? 19  ? P04190 31  ? 49  ? 27  45  
+# 
+loop_
+_struct_ref_seq_dif.align_id 
+_struct_ref_seq_dif.pdbx_pdb_id_code 
+_struct_ref_seq_dif.mon_id 
+_struct_ref_seq_dif.pdbx_pdb_strand_id 
+_struct_ref_seq_dif.seq_num 
+_struct_ref_seq_dif.pdbx_pdb_ins_code 
+_struct_ref_seq_dif.pdbx_seq_db_name 
+_struct_ref_seq_dif.pdbx_seq_db_accession_code 
+_struct_ref_seq_dif.db_mon_id 
+_struct_ref_seq_dif.pdbx_seq_db_seq_num 
+_struct_ref_seq_dif.details 
+_struct_ref_seq_dif.pdbx_auth_seq_num 
+_struct_ref_seq_dif.pdbx_ordinal 
+1  2BFL CYS A 91 ? UNP P04190 ARG 121 'ENGINEERED MUTATION' 121 1 
+13 2BFL CYS B 91 ? UNP P04190 ARG 121 'ENGINEERED MUTATION' 121 2 
+
-- 
GitLab