From aac17c6cf70a13e6aa53792cf162b8d7a4022ef4 Mon Sep 17 00:00:00 2001
From: Stefan Bienert <stefan.bienert@unibas.ch>
Date: Wed, 20 Jul 2011 12:12:48 +0200
Subject: [PATCH] First version of a MMCIF Parser, early stage (experimental)

---
 modules/io/pymod/CMakeLists.txt               |   1 +
 modules/io/pymod/__init__.py                  | 101 +++-
 modules/io/pymod/wrap_io.cc                   |   2 +
 modules/io/src/mol/CMakeLists.txt             |   2 +
 modules/io/src/mol/mmcif_reader.cc            | 550 ++++++++++++++++++
 modules/io/src/mol/mmcif_reader.hh            | 208 +++++++
 modules/io/src/mol/star_parser.cc             |  71 ++-
 modules/io/src/mol/star_parser.hh             |  34 +-
 modules/io/tests/CMakeLists.txt               |   1 +
 modules/io/tests/test_mmcif_reader.cc         | 233 ++++++++
 .../io/tests/testfiles/mmcif/atom_site.mmcif  |  49 ++
 11 files changed, 1248 insertions(+), 4 deletions(-)
 create mode 100644 modules/io/src/mol/mmcif_reader.cc
 create mode 100644 modules/io/src/mol/mmcif_reader.hh
 create mode 100644 modules/io/tests/test_mmcif_reader.cc
 create mode 100644 modules/io/tests/testfiles/mmcif/atom_site.mmcif

diff --git a/modules/io/pymod/CMakeLists.txt b/modules/io/pymod/CMakeLists.txt
index 902089f7f..023e000b8 100644
--- a/modules/io/pymod/CMakeLists.txt
+++ b/modules/io/pymod/CMakeLists.txt
@@ -1,6 +1,7 @@
 set(OST_IO_PYMOD_SOURCES
   wrap_io.cc
   export_pdb_io.cc
+  export_mmcif_io.cc
 )
 
 if (ENABLE_IMG)
diff --git a/modules/io/pymod/__init__.py b/modules/io/pymod/__init__.py
index 74047dec0..57a0464bb 100644
--- a/modules/io/pymod/__init__.py
+++ b/modules/io/pymod/__init__.py
@@ -88,7 +88,7 @@ def LoadPDB(filename, restrict_chains="", no_hetatms=None,
             profile='DEFAULT', remote=False, dialect=None,
             strict_hydrogens=None, seqres=False):
   """
-  Load PDB file from disk and returns one or more entities. Several options 
+  Load PDB file from disk and return one or more entities. Several options 
   allow to customize the exact behaviour of the PDB import. For more information 
   on these options, see :doc:`profile`.
   
@@ -264,6 +264,105 @@ def LoadCHARMMTraj(crd, dcd_file=None, profile='CHARMM',
       raise ValueError("No DCD filename given")
   return LoadCHARMMTraj_(crd, dcd_file, stride, lazy_load)
 
+def LoadMMCIF(filename, restrict_chains="", no_hetatms=None,
+            fault_tolerant=None, load_multi=False, quack_mode=None,
+            join_spread_atom_records=None, calpha_only=None,
+            profile='DEFAULT', remote=False, strict_hydrogens=None,
+              seqres=False):
+  """
+  Load MMCIF file from disk and return one or more entities. Several options 
+  allow to customize the exact behaviour of the MMCIF import. For more
+  information on these options, see :doc:`profile`.
+  
+  Residues are flagged as ligand if they are mentioned in a HET record.
+
+  :param restrict_chains: If not an empty string, only chains listed in the
+     string will be imported.
+
+  :param fault_tolerant: Enable/disable fault-tolerant import. If set, overrides
+     the value of :attr:`IOProfile.fault_tolerant`.
+
+  :param no_hetatms: If set to True, HETATM records will be ignored. Overrides 
+      the value of :attr:`IOProfile.no_hetatms`
+
+  :param load_multi: If set to True, a list of entities will be returned instead
+      of only the first. This is useful when dealing with multi-PDB files.
+
+  :param join_spread_atom_records: If set, overrides the value of 
+      :attr:`IOProfile.join_spread_atom_records`.
+  
+  :param remote: If set to true, the method tries to load the pdb from the 
+     remote pdb repository www.pdb.org. The filename is then interpreted as the 
+     pdb id.
+     
+  :rtype: :class:`~ost.mol.EntityHandle` or a list thereof if `load_multi` is 
+      True.
+
+  :param seqres: Whether to read SEQRES records. If set to true, the loaded 
+    entity and seqres entry will be returned as a tuple.
+  
+  :param strict_hydrogens: If set, overrides the value of 
+     :attr:`IOProfile.strict_hydrogens`.
+
+  :raises: :exc:`~ost.io.IOException` if the import fails due to an erroneous or 
+      inexistent file
+  """
+  def _override(val1, val2):
+    if val2!=None:
+      return val2
+    else:
+      return val1
+  if isinstance(profile, str):
+    prof = profiles[profile].Copy()
+  else:
+    prof = profile.Copy()
+
+  prof.calpha_only=_override(prof.calpha_only, calpha_only)
+  prof.no_hetatms=_override(prof.no_hetatms, no_hetatms)
+  prof.quack_mode=_override(prof.quack_mode, quack_mode)
+  prof.strict_hydrogens=_override(prof.strict_hydrogens, strict_hydrogens)
+  prof.fault_tolerant=_override(prof.fault_tolerant, fault_tolerant)
+  prof.join_spread_atom_records=_override(prof.join_spread_atom_records,
+                                          join_spread_atom_records)
+
+  if remote:
+    output_dir = tempfile.gettempdir()
+    if __GetModelFromPDB(filename, output_dir):
+      filename = os.path.join(output_dir, 'pdb%s.ent.gz' % filename)
+    else:
+      raise IOError('Can not load PDB %s from www.pdb.org'%filename) 
+  
+  conop_inst = conop.Conopology.Instance()
+  builder = conop_inst.GetBuilder("DEFAULT")
+
+  builder.strict_hydrogens = prof.strict_hydrogens
+
+  try:
+    #if load_multi:
+    #  ent_list=[]
+    #  while reader.HasNext():
+    #    ent=mol.CreateEntity()
+    #    reader.Import(ent, restrict_chains)
+    #    conop_inst.ConnectAll(builder, ent, 0)
+    #    ent_list.append(ent)
+    #  if len(ent_list)==0:
+    #    raise IOError("File doesn't contain any entities")
+    #  return ent_list
+    #else:
+    ent = mol.CreateEntity()
+    reader = MMCifParser(filename, ent, prof)
+    #reader.read_seqres = seqres
+    #if reader.HasNext():
+    reader.Parse()
+    conop_inst.ConnectAll(builder, ent, 0)
+    #else:
+    #  raise IOError("File doesn't contain any entities")
+    #if seqres:
+    #  return ent, reader.seqres
+    return ent
+  except:
+    raise
+
 ## \example fft_li.py
 #
 # This scripts loads one or more images and shows their Fourier Transforms on 
diff --git a/modules/io/pymod/wrap_io.cc b/modules/io/pymod/wrap_io.cc
index af5087cd2..1d9e3b936 100644
--- a/modules/io/pymod/wrap_io.cc
+++ b/modules/io/pymod/wrap_io.cc
@@ -72,6 +72,7 @@ BOOST_PYTHON_FUNCTION_OVERLOADS(save_charmm_trj_ov,
 }
 
 void export_pdb_io();
+void export_mmcif_io();
 #if OST_IMG_ENABLED
 void export_map_io();
 #endif
@@ -117,6 +118,7 @@ BOOST_PYTHON_MODULE(_ost_io)
   def("LoadMAE", &LoadMAE);
 
   export_pdb_io();
+  export_mmcif_io();
 #if OST_IMG_ENABLED  
   export_map_io();
 #endif
diff --git a/modules/io/src/mol/CMakeLists.txt b/modules/io/src/mol/CMakeLists.txt
index 1201eb58f..db328bb9c 100644
--- a/modules/io/src/mol/CMakeLists.txt
+++ b/modules/io/src/mol/CMakeLists.txt
@@ -15,12 +15,14 @@ chemdict_parser.cc
 io_profile.cc
 dcd_io.cc
 star_parser.cc
+mmcif_reader.cc
 PARENT_SCOPE
 )
 
 set(OST_IO_MOL_HEADERS
 chemdict_parser.hh
 star_parser.hh
+mmcif_reader.hh
 io_profile.hh
 dcd_io.hh
 entity_io_crd_handler.hh
diff --git a/modules/io/src/mol/mmcif_reader.cc b/modules/io/src/mol/mmcif_reader.cc
new file mode 100644
index 000000000..535aedbfa
--- /dev/null
+++ b/modules/io/src/mol/mmcif_reader.cc
@@ -0,0 +1,550 @@
+//------------------------------------------------------------------------------
+// This file is part of the OpenStructure project <www.openstructure.org>
+//
+// Copyright (C) 2008-2011 by the OpenStructure authors
+//
+// This library is free software; you can redistribute it and/or modify it under
+// the terms of the GNU Lesser General Public License as published by the Free
+// Software Foundation; either version 3.0 of the License, or (at your option)
+// any later version.
+// This library is distributed in the hope that it will be useful, but WITHOUT
+// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+// FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public License for more
+// details.
+//
+// You should have received a copy of the GNU Lesser General Public License
+// along with this library; if not, write to the Free Software Foundation, Inc.,
+// 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
+//------------------------------------------------------------------------------
+
+#include <ctype.h>
+
+//#include <boost/iostreams/filter/gzip.hpp>
+//#include <boost/filesystem/convenience.hpp>
+//#include <boost/algorithm/string.hpp>
+//#include <boost/algorithm/string/trim.hpp>
+
+//#include <boost/format.hpp>
+//#include <boost/lexical_cast.hpp>
+
+#include <ost/profile.hh>
+#include <ost/log.hh>
+#include <ost/mol/xcs_editor.hh>
+#include <ost/io/mol/mmcif_reader.hh>
+
+namespace ost { namespace io {
+
+MMCifParser::MMCifParser(std::istream& stream, mol::EntityHandle& ent_handle,
+                         const IOProfile& profile):
+  StarParser(stream, true), profile_(profile), ent_handle_(ent_handle)
+{
+  this->Init();
+}
+
+MMCifParser::MMCifParser(const String& filename, mol::EntityHandle& ent_handle,
+                         const IOProfile& profile):
+  StarParser(filename, true), profile_(profile), ent_handle_(ent_handle)
+{
+  this->Init();
+}
+
+void MMCifParser::Init()
+{
+  warned_name_mismatch_ = false;
+  category_             = DONT_KNOW;
+  chain_count_          = 0;
+  record_type_          = "";
+  atom_count_           = 0;
+  residue_count_        = 0;
+  auth_chain_id_        = false;
+  go_on_                = true;
+  //memset(indices_, -1, MAX_ITEMS_IN_ROW * sizeof(int));
+  restrict_chains_      = "";
+  //curr_chain_           = mol::ChainHandle();
+  //curr_residue_         = mol::ResidueHandle();
+}
+
+void MMCifParser::ClearState()
+{
+  curr_chain_           = mol::ChainHandle();
+  curr_residue_         = mol::ResidueHandle();
+  chain_count_          = 0;
+  residue_count_        = 0;
+  atom_count_           = 0;
+  category_             = DONT_KNOW;
+  record_type_          = "";
+  warned_name_mismatch_ = false;
+}
+
+void MMCifParser::SetRestrictChains(const String& restrict_chains)
+{
+  restrict_chains_ = restrict_chains;
+}
+
+bool MMCifParser::IsValidPDBIdent(const StringRef& pdbid)
+{
+  if (pdbid.length() == PDBID_LEN && isdigit(pdbid[0])) {
+    return true;
+  }
+  return false;
+}
+
+bool MMCifParser::OnBeginData(const StringRef& data_name) 
+{
+  LOG_DEBUG("MCIFFReader: " << profile_);
+  Profile profile_import("MMCifReader::OnBeginData");
+
+  // check for PDB id
+  if (!this->IsValidPDBIdent(data_name)) {
+    throw IOException(this->FormatDiagnostic(STAR_DIAG_ERROR,
+                         "No valid PDB id found for data block, read instead \'"
+                                             + data_name.str() + "\'",
+                                             this->GetCurrentLinenum()));
+  }
+
+  this->ClearState();
+
+  go_on_ = true;
+
+  return true;
+}
+
+bool MMCifParser::OnBeginLoop(const StarLoopDesc& header)
+{
+  // set whole array to -1
+  memset(indices_, -1, MAX_ITEMS_IN_ROW * sizeof(int));
+  // walk through possible categories
+  if (header.GetCategory() == "atom_site") {
+    category_ = ATOM_SITE;
+    // mandatory items
+    this->TryStoreIdx(AUTH_ASYM_ID,    "auth_asym_id",    header);
+    this->TryStoreIdx(ID,              "id",              header);
+    this->TryStoreIdx(LABEL_ALT_ID,    "label_alt_id",    header);
+    this->TryStoreIdx(LABEL_ASYM_ID,   "label_asym_id",   header);
+    this->TryStoreIdx(LABEL_ATOM_ID,   "label_atom_id",   header);
+    this->TryStoreIdx(LABEL_COMP_ID,   "label_comp_id",   header);
+    this->TryStoreIdx(LABEL_ENTITY_ID, "label_entity_id", header);
+    this->TryStoreIdx(LABEL_SEQ_ID,    "label_seq_id",    header);
+    this->TryStoreIdx(TYPE_SYMBOL,     "type_symbol",     header);
+    // assumed mandatory
+    this->TryStoreIdx(CARTN_X, "Cartn_x", header);
+    this->TryStoreIdx(CARTN_Y, "Cartn_y", header);
+    this->TryStoreIdx(CARTN_Z, "Cartn_z", header);
+    // optional
+    indices_[PDBX_PDB_INS_CODE] = header.GetIndex("pdbx_PDB_ins_code");
+    indices_[OCCUPANCY] = header.GetIndex("occupancy");
+    indices_[B_ISO_OR_EQUIV] = header.GetIndex("B_iso_or_equiv");
+    indices_[GROUP_PDB] = header.GetIndex("group_PDB");
+    return true;
+  } /*else if (header.GetCategory()=="entity_poly") {
+  } else if (header.GetCategory()=="pdbx_poly_seq_scheme") {
+  } else if (header.GetCategory()=="pdbx_struct_assembly") {
+  } else if (header.GetCategory()=="struct_conf") {
+  }*/
+  return false;
+}
+
+bool MMCifParser::EnsureEnoughColumns(const std::vector<StringRef>& columns,
+                                      size_t size)
+{
+  if (columns.size() < size) {
+    if (profile_.fault_tolerant) {
+      return false;
+    }
+    throw IOException(this->FormatDiagnostic(STAR_DIAG_ERROR,
+                                             "Not enough data fields",
+                                             this->GetCurrentLinenum()));
+  }  
+  return true;
+}
+
+mol::ResNum to_res_num(int num, char ins_code)
+{
+  return mol::ResNum(num, ins_code==' ' ? '\0' : ins_code);
+}
+
+bool MMCifParser::ParseAtomIdent(const std::vector<StringRef>& columns,
+                                 String& chain_name,
+                                 StringRef& res_name,
+                                 mol::ResNum& resnum,
+                                 StringRef& atom_name,
+                                 char& alt_loc)
+{
+  // is checking really neccessary? just by using OnBeginData, enough columns
+  // should be available!
+  if (!this->EnsureEnoughColumns(columns, 12)) {
+    return false;
+  }
+  // ATOM name
+  atom_name = columns[indices_[LABEL_ATOM_ID]];
+  if (profile_.calpha_only) { // unit test profile
+    if (atom_name != StringRef("CA", 2)) {
+      return false;
+    }
+  }
+  // CHAIN ID
+  if (auth_chain_id_) { // unit test different IDs
+    chain_name = columns[indices_[AUTH_ASYM_ID]].str();
+  } else {
+    chain_name = columns[indices_[LABEL_ASYM_ID]].str();
+    if (restrict_chains_.size() > 0 && // unit test
+        restrict_chains_.find(chain_name) == String::npos) { // unit test
+      return false;
+    }
+  }
+ 
+  std::pair<bool, int> a_num = this->TryGetInt(columns[indices_[ID]],
+                                               "_atom_site.id",
+                                          profile_.fault_tolerant); // unit test
+
+  alt_loc = columns[indices_[LABEL_ALT_ID]][0];
+  res_name = columns[indices_[LABEL_COMP_ID]];
+  std::pair<bool, int> res_num =this->TryGetInt(columns[indices_[LABEL_SEQ_ID]],
+                                               "_atom_site.label_seq_id",
+                                          profile_.fault_tolerant); // unit test
+  if (!res_num.first) { // unit test
+    if (profile_.fault_tolerant) {
+      return false;
+    }
+  }
+
+  if (indices_[PDBX_PDB_INS_CODE] != -1) { // unit test
+    resnum=to_res_num(res_num.second, columns[indices_[PDBX_PDB_INS_CODE]][0]);
+  }
+  else {
+    resnum=to_res_num(res_num.second, ' ');
+  }
+
+  return true;
+}
+
+void MMCifParser::ParseAndAddAtom(const std::vector<StringRef>& columns)
+{
+  if (!this->EnsureEnoughColumns(columns, 12)) {
+    return;
+  }
+  mol::XCSEditor editor=ent_handle_.EditXCS(mol::BUFFERED_EDIT); // potbl
+  char alt_loc=0;
+  String chain_name;
+  StringRef res_name, atom_name;
+  mol::ResNum res_num(0);
+  if (!this->ParseAtomIdent(columns,
+                            chain_name,
+                            res_name,
+                            res_num,
+                            atom_name,
+                            alt_loc)) {
+    return;                            
+  }
+  Real occ, temp;
+  geom::Vec3 apos;
+  
+  for (int i = CARTN_X; i <= CARTN_Z; ++i) {
+    std::pair<bool, float> result = this->TryGetFloat(columns[indices_[i]],
+                                                      "atom_site.cartn_[xyz]",
+                                                      profile_.fault_tolerant);
+    if (!result.first) { // unit test
+      if (profile_.fault_tolerant) { // unit test
+        return;
+      }
+    }
+    apos[i - CARTN_X] = result.second;
+  }
+
+  if (indices_[OCCUPANCY] != -1) { // unit test
+    occ = this->TryGetReal(columns[indices_[OCCUPANCY]], "atom_site.occupancy");
+  } else {
+    occ = 1.00f;
+  }
+  if (indices_[B_ISO_OR_EQUIV] != -1) { // unit test
+    temp = this->TryGetReal(columns[indices_[B_ISO_OR_EQUIV]],
+                            "atom_site.B_iso_or_equiv");
+  } else {
+    temp = 0;
+  }
+
+  // determine element
+  String s_ele(columns[indices_[TYPE_SYMBOL]].str());
+
+  String aname(atom_name.str());  
+  // some postprocessing
+  LOG_TRACE( "s_chain: [" << chain_name << "]" );
+
+  // determine chain and residue update
+  bool update_chain=false;
+  bool update_residue=false;
+  if(!curr_chain_) { // unit test
+      update_chain=true;
+      update_residue=true;
+  } else if(curr_chain_.GetName() != chain_name) { // unit test
+    update_chain=true;
+    update_residue=true;
+  }
+
+  if(!curr_residue_) { // unit test
+    update_residue=true;
+  } else if(curr_residue_.GetNumber() != res_num) { // unit test
+    update_residue=true;
+  }
+
+  if(update_chain) { // unit test
+    curr_chain_ = ent_handle_.FindChain(chain_name);
+    if(!curr_chain_.IsValid()) { // unit test
+      LOG_DEBUG("new chain " << chain_name);
+      curr_chain_=editor.InsertChain(chain_name);
+      ++chain_count_;
+    }
+    assert(curr_chain_.IsValid());
+  }
+
+  if(update_residue) { // unit test
+    curr_residue_=mol::ResidueHandle();
+    if (profile_.join_spread_atom_records) { // unit test
+      curr_residue_=curr_chain_.FindResidue(res_num);
+    }
+    if (!curr_residue_.IsValid()) { // unit test
+      LOG_DEBUG("new residue " << res_name << " " << res_num);
+      curr_residue_ =editor.AppendResidue(curr_chain_, res_name.str(), res_num);
+      warned_name_mismatch_=false;
+      ++residue_count_; 
+    }
+    assert(curr_residue_.IsValid());
+  }
+
+  // finally add atom
+  LOG_DEBUG("adding atom " << aname << " (" << s_ele << ") @" << apos);
+  mol::AtomHandle ah;
+  if (curr_residue_.GetName()!=res_name.str()) { // unit test
+    if (!profile_.fault_tolerant && alt_loc=='.') { // unit test
+      std::stringstream ss;
+      ss << "Residue with number " << res_num << " has more than one name.";
+      throw IOException(this->FormatDiagnostic(STAR_DIAG_ERROR,
+                                               ss.str(),
+                                               this->GetCurrentLinenum()));
+    }
+    if (!warned_name_mismatch_) { // unit test
+      if (alt_loc=='.') { // unit test
+        LOG_WARNING("Residue with number " << res_num << " has more than one "
+                    "name. Ignoring atoms for everything but the first");
+      } else {
+        LOG_WARNING("Residue with number " << res_num 
+                    << " contains a microheterogeneity. Everything but atoms "
+                    "for the residue '" << curr_residue_.GetName() 
+                    << "' will be ignored");
+      }
+    }
+    warned_name_mismatch_=true;
+    return;
+  }
+  if (alt_loc!='.') { // unit test
+    // Check if there is already a atom with the same name.
+    mol::AtomHandle me=curr_residue_.FindAtom(aname);
+    if (me.IsValid()) { // unit test
+      try {
+        editor.AddAltAtomPos(String(1, alt_loc), me, apos);
+      } catch (Error) {
+        LOG_INFO("Ignoring atom alt location since there is already an atom "
+                 "with name " << aname << ", but without an alt loc");
+        return;
+      }
+      return;
+    } else {
+      ah = editor.InsertAltAtom(curr_residue_, aname,
+                                String(1, alt_loc), apos, s_ele);
+      ++atom_count_;
+      }
+  } else {
+    mol::AtomHandle atom=curr_residue_.FindAtom(aname);
+    if (atom.IsValid() && !profile_.quack_mode) { // unit test
+      if (profile_.fault_tolerant) { // unit test
+        LOG_WARNING("duplicate atom '" << aname << "' in residue " 
+                    << curr_residue_);
+        return;
+      }
+      throw IOException(this->FormatDiagnostic(STAR_DIAG_ERROR,
+                                               "Duplicate atom '"+aname+
+                                               "' in residue "+
+                                               curr_residue_.GetQualifiedName(),
+                                               this->GetCurrentLinenum()));
+    }
+    ah = editor.InsertAtom(curr_residue_, aname, apos, s_ele);
+    ++atom_count_;
+  }
+  if (indices_[B_ISO_OR_EQUIV] != -1) {// unit test
+    ah.SetBFactor(temp);
+  }
+  if (indices_[OCCUPANCY] != -1) {// unit test
+    ah.SetOccupancy(occ);
+  }
+
+  // record type
+  if (indices_[GROUP_PDB] != -1) {
+    record_type_ = columns[indices_[GROUP_PDB]].str();
+  } else {
+    record_type_ = "ATOM";
+  }
+  ah.SetHetAtom(record_type_[0]=='H');
+}
+
+void MMCifParser::OnDataRow(const StarLoopDesc& header, 
+                            const std::vector<StringRef>& columns)
+{
+  switch(category_) {
+  case ATOM_SITE:
+    LOG_TRACE("processing atom_site entry");
+    this->ParseAndAddAtom(columns);
+    break;
+  default:
+    return;
+  }
+}
+
+    /*
+void PDBReader::Import(mol::EntityHandle& ent,
+                       const String& restrict_chains)
+{
+  do {
+    switch(curr_line[0]) {
+      case 'A':
+      case 'a':
+        if (IEquals(curr_line.substr(0, 6), StringRef("ANISOU", 6))) {
+          if (!charmm_style_) {
+            LOG_TRACE("processing ANISOU entry");
+            this->ParseAnisou(curr_line, line_num_, ent);            
+          }
+        }
+        break;
+      case 'C':
+      case 'c':
+        if (curr_line.size()<20) {
+          LOG_TRACE("skipping entry");
+          continue;
+        }
+        if (IEquals(curr_line.substr(0, 6), StringRef("COMPND", 6))) {
+          LOG_TRACE("processing COMPND entry");
+          this->ParseCompndEntry(curr_line, line_num_);
+        }
+        break;
+      case 'E':
+      case 'e':
+        if (curr_line.size()<3) {
+          continue;
+        }
+        if (IEquals(curr_line.rtrim(), StringRef("END", 3))) {
+          hard_end_=true;
+          go_on=false;
+          break;
+        }
+        if (curr_line.size()<6) {
+          continue;
+        }
+        if (IEquals(curr_line.substr(0, 6), StringRef("ENDMDL", 6))) {
+          go_on=false;
+          num_model_records_=0;
+          break;
+        }
+      case 'H':
+      case 'h':
+        if (curr_line.size()<6) {
+          continue;
+        }
+        if (IEquals(curr_line.substr(0, 6), StringRef("HETATM", 6))) {
+          if (profile_.no_hetatms)
+            continue;
+          LOG_TRACE("processing HETATM entry");
+          this->ParseAndAddAtom(curr_line, line_num_, ent, 
+                                StringRef("HETATM", 6));
+        } else if (IEquals(curr_line.substr(0, 6), StringRef("HELIX ", 6))) {
+          if (!charmm_style_) {
+            this->ParseHelixEntry(curr_line);            
+          }
+        } else if (IEquals(curr_line.substr(0, 6), StringRef("HET   ", 6))) {
+          // remember het entry to mark the residues as ligand during ATOM import
+          char chain=curr_line[12];
+          std::pair<bool, int> num=curr_line.substr(13, 4).ltrim().to_int();
+          if (!num.first) {
+            if (profile_.fault_tolerant) {
+              LOG_WARNING("Invalid HET entry on line " << line_num_);
+              continue;
+            } else {
+              String msg=str(format("Invalid HET entry on line %d")%line_num_);
+              throw IOException(msg);
+            }
+          }
+          hets_.push_back(HetEntry(chain, to_res_num(num.second,
+                                   curr_line[17])));
+        }
+        break;
+      case 'M':
+      case 'm':
+        if (curr_line.size()<6) {
+          continue;
+        }
+        if (IEquals(curr_line.substr(0, 6), StringRef("MODEL ", 6))) {
+          ++num_model_records_;          
+          if (num_model_records_<2) {
+            continue;
+          }
+          if (profile_.fault_tolerant) {
+            go_on=false;
+            num_model_records_=1;
+            break;
+          }
+          String msg=str(format("MODEL record without matching ENDMDL on line %d")%line_num_);
+          throw IOException(msg);
+        }
+        break;
+      case 'S':
+      case 's':
+        if (curr_line.size()<6) {
+          continue;
+        }
+        if (IEquals(curr_line.substr(0, 6), StringRef("SHEET ", 6))) {
+          if (!charmm_style_) {
+            this->ParseStrandEntry(curr_line);
+          }
+        }
+        if (IEquals(curr_line.substr(0, 6), StringRef("SEQRES", 6))) {
+          if (read_seqres_) {
+            this->ParseSeqRes(curr_line, line_num_);
+          }
+        }
+        break;
+      default:
+        break;
+    }
+  }  while (std::getline(in_, curr_line_) && ++line_num_ && go_on);
+  LOG_INFO("imported "
+               << chain_count_ << " chains, "
+               << residue_count_ << " residues, "
+               << atom_count_ << " atoms; with "
+               << helix_list_.size() << " helices and "
+               << strand_list_.size() << " strands");
+  this->AssignSecStructure(ent);
+  this->AssignMolIds(ent);
+  for (HetList::const_iterator i=hets_.begin(), e=hets_.end(); i!=e; ++i) {
+    mol::ResidueHandle res=ent.FindResidue(String(1, i->chain), i->num);
+    if (res.IsValid()) {
+      res.SetIsLigand(true);
+    }      
+  }
+}
+     */
+
+  
+  /*
+  virtual void OnEndLoop() { }
+
+  virtual void OnDataItem(const StarDataItem& item) { }
+
+  };*/
+
+void MMCifParser::OnEndData()
+{
+  LOG_INFO("imported "
+           << chain_count_ << " chains, "
+           << residue_count_ << " residues, "
+           << atom_count_ << " atoms;");
+}
+
+}}
diff --git a/modules/io/src/mol/mmcif_reader.hh b/modules/io/src/mol/mmcif_reader.hh
new file mode 100644
index 000000000..20f7bacd9
--- /dev/null
+++ b/modules/io/src/mol/mmcif_reader.hh
@@ -0,0 +1,208 @@
+//------------------------------------------------------------------------------
+// This file is part of the OpenStructure project <www.openstructure.org>
+//
+// Copyright (C) 2008-2011 by the OpenStructure authors
+//
+// This library is free software; you can redistribute it and/or modify it under
+// the terms of the GNU Lesser General Public License as published by the Free
+// Software Foundation; either version 3.0 of the License, or (at your option)
+// any later version.
+// This library is distributed in the hope that it will be useful, but WITHOUT
+// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+// FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public License for more
+// details.
+//
+// You should have received a copy of the GNU Lesser General Public License
+// along with this library; if not, write to the Free Software Foundation, Inc.,
+// 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
+//------------------------------------------------------------------------------
+#ifndef OST_MMCIF_PARSER_HH
+#define OST_MMCIF_PARSER_HH
+
+//#include <boost/iostreams/filtering_stream.hpp>
+//#include <boost/filesystem/fstream.hpp>
+
+#include <ost/mol/residue_handle.hh>
+#include <ost/io/mol/io_profile.hh>
+#include <ost/io/io_exception.hh>
+#include <ost/io/mol/star_parser.hh>
+
+namespace ost { namespace io {
+
+/// \enum categories of the mmcif format
+typedef enum {
+  ATOM_SITE,
+  DONT_KNOW
+} MMCifCategory;
+
+
+/// \brief reader for the mmcif file format
+/// 
+/// \section mmcif_format mmcif format description/ coverage
+/// 
+/// mmcif is an instance of the \link StarParser STAR format\endlink to store
+/// entries of the PDB. The following data categories should be covered by this
+/// parser:
+/// 
+/// \li atom_site
+class DLLEXPORT_OST_IO MMCifParser : public StarParser  {
+public:
+  /// \brief create a MMCifParser
+  ///
+  /// \param stream input stream
+  MMCifParser(std::istream& stream, mol::EntityHandle& ent_handle,
+              const IOProfile& profile);
+
+  /// \brief create a MMCifParser
+  ///
+  /// \param filename input file
+  MMCifParser(const String& filename, mol::EntityHandle& ent_handle,
+              const IOProfile& profile);
+
+  /// \brief Initialise the parser.
+  ///
+  /// \param loc Location of the file
+  void Init();
+
+  /// \brief Set up a fresh instance
+  void ClearState();
+
+  /// \brief Set names of restricted chains for the parser.
+  ///
+  /// \param restrict_chains chain name
+  void SetRestrictChains(const String& restrict_chains);
+
+  const String& GetRestrictChains() const
+  {
+    return restrict_chains_;
+  }
+
+  /// \brief check mmcif input to be read. Substitutional function for
+  /// \link StarParser StarParser\endlink.
+  ///
+  /// \param data_name value of the data_ tag
+  ///
+  /// \return true, if the blockcode (PDB id) is valid, false otherwise
+  virtual bool OnBeginData(const StringRef& data_name);
+
+  /// \brief check if a current loop is to be parsed
+  ///
+  /// \param header categories of the upcoming loop block
+  ///
+  /// \return bool
+  virtual bool OnBeginLoop(const StarLoopDesc& header);
+
+  /// \brief read a row of data
+  ///
+  /// \param header categories and items
+  /// \param columns data
+  virtual void OnDataRow(const StarLoopDesc& header, 
+                         const std::vector<StringRef>& columns);
+
+  /// \brief Finalise parsing.
+  virtual void OnEndData();
+
+ protected:
+  /// \brief Store an item index from loop header in preparation for reading a 
+  ///        row. Throws an exception if the item does not exist.
+  ///
+  /// \param mapping position the item index is stored at
+  /// \param item exact item name to fetch
+  /// \param header loop header to pull index from
+  void TryStoreIdx(const int mapping,
+                   const String& item,
+                   const StarLoopDesc& header)
+  {
+    indices_[mapping] = header.GetIndex(item);
+    
+    if (indices_[mapping] == -1) {
+      throw IOException(this->FormatDiagnostic(STAR_DIAG_ERROR,
+                                               "No item '" + item +
+                                               "' found in '" +
+                                               header.GetCategory()+
+                                               "' header",
+                                               this->GetCurrentLinenum()));
+     }
+  }
+
+  /// \brief Check a PDB id to be of length 4 and start with a digit
+  ///
+  /// \param pdbid putative PDB id
+  ///
+  /// \return true for a valid id, false otherwise
+  bool IsValidPDBIdent(const StringRef& pdbid);
+
+  /// \brief Check no. of columns available.
+  ///
+  /// \param columns fields
+  /// \param size required no. of columns
+  ///
+  /// \return true if there are enough columns, false otherwise
+  bool EnsureEnoughColumns(const std::vector<StringRef>& columns, size_t size);
+
+  /// \brief ...
+  ///
+  /// \param columns data row
+  /// \param atom_name corresponds to label_atom_id
+  bool ParseAtomIdent(const std::vector<StringRef>& columns,
+                      String& chain_name,
+                      StringRef& res_name,
+                      mol::ResNum& resnum,
+                      StringRef& atom_name,
+                      char& alt_loc);
+
+  /// \brief Fetch atom information and store it.
+  ///
+  /// \param columns data row
+  void ParseAndAddAtom(const std::vector<StringRef>& columns);
+
+private:
+  /// \enum magic numbers of this class
+  typedef enum {
+    PDBID_LEN=4,        ///< length of a PDB id
+    MAX_ITEMS_IN_ROW=16 ///< count for possible items in a loop row
+  } MMCifMagicNos;
+
+  /// \enum items of the atom_site category
+  typedef enum {
+    AUTH_ASYM_ID,      ///< chain name by author as in PDB
+    ID,                ///< atom serial id
+    LABEL_ALT_ID,      ///< AltLoc
+    LABEL_ASYM_ID,     ///< chain name by PDB
+    LABEL_ATOM_ID,
+    LABEL_COMP_ID,
+    LABEL_ENTITY_ID,
+    LABEL_SEQ_ID,      ///< residue no.
+    TYPE_SYMBOL,       ///< chemical element
+    CARTN_X,           ///< Coordinates ||IMPORTANT: This 3 entries have to stay
+    CARTN_Y,           ///< Coordinates ||together for the parser to work!
+    CARTN_Z,           ///< Coordinates ||
+    PDBX_PDB_INS_CODE,
+    OCCUPANCY,
+    B_ISO_OR_EQUIV,
+    GROUP_PDB          ///< record name
+  } AtomSiteItems;
+
+  // members
+  MMCifCategory category_;
+  int           indices_[MAX_ITEMS_IN_ROW]; ///< map items to values in loops
+  const IOProfile& profile_;
+  mol::EntityHandle& ent_handle_;
+  String restrict_chains_;
+  bool auth_chain_id_;       ///< use chain IDs given by authors rather than pdb
+  mol::ChainHandle curr_chain_;
+  mol::ResidueHandle curr_residue_;
+  int chain_count_;
+  int residue_count_;
+  int atom_count_;
+  bool warned_name_mismatch_;
+  String record_type_;
+  bool go_on_; ///< flow control within the parser hooks
+  //from pdbdreader
+  //entity als member, fill in ondatarow
+  //import function
+};
+
+}}
+
+#endif
diff --git a/modules/io/src/mol/star_parser.cc b/modules/io/src/mol/star_parser.cc
index 33e235d09..d0440904e 100644
--- a/modules/io/src/mol/star_parser.cc
+++ b/modules/io/src/mol/star_parser.cc
@@ -20,28 +20,40 @@
 /*
   Author: Marco Biasini
  */
+#include <boost/iostreams/filter/gzip.hpp>
+
 #include <cassert>
 #include <sstream>
+#include <ost/log.hh>
 #include <ost/io/io_exception.hh>
 #include <ost/io/mol/star_parser.hh>
 
 namespace ost { namespace io {
 
 StarParser::StarParser(std::istream& stream, bool items_as_row):
-  stream_(stream), filename_("<stream>"), line_num_(0),
+  filename_("<stream>"), line_num_(0),
   has_current_line_(false), current_line_(),
   items_row_header_(), items_row_columns_(),
   items_row_values_()
 {
   items_as_row_ = items_as_row;
+
+  stream_.push(stream);
 }
 
 StarParser::StarParser(const String& filename, bool items_as_row):
-  fstream_(filename.c_str()), stream_(fstream_), filename_(filename),
+  fstream_(filename.c_str()), filename_(filename),
   line_num_(0), has_current_line_(false), current_line_(),
   items_row_header_(), items_row_columns_(), items_row_values_()
 {
   items_as_row_ = items_as_row;
+
+  if (filename.length() >= 3 &&
+      filename.substr(filename.length() - 3) == ".gz") {
+    stream_.push(boost::iostreams::gzip_decompressor());
+  }
+
+  stream_.push(fstream_);
 }
 
 String StarParser::FormatDiagnostic(StarDiagType type, const String& message,
@@ -66,6 +78,18 @@ String StarParser::FormatDiagnostic(StarDiagType type, const String& message,
   return ss.str();
 }
 
+Real StarParser::TryGetReal(const StringRef& data, const String& name) const
+{
+  std::pair<bool, Real> value = data.to_float();
+  if (!value.first) {
+    throw IOException(this->FormatDiagnostic(STAR_DIAG_ERROR,
+                                         "Expecting real number for " +
+                                               name + ", found '" + data.str() +
+                                             "' instead.", line_num_));
+  }
+  return value.second;
+}
+
 float StarParser::TryGetFloat(const StringRef& data, const String& name) const
 {
   std::pair<bool, float> value = data.to_float();
@@ -78,6 +102,28 @@ float StarParser::TryGetFloat(const StringRef& data, const String& name) const
   return value.second;
 }
 
+std::pair<bool, float> StarParser::TryGetFloat(const StringRef& data,
+                                               const String& name,
+                                               bool may_fail) const
+{
+  std::pair<bool, float> value = data.to_float();
+  if (!value.first) {
+    if (!may_fail) {
+    throw IOException(this->FormatDiagnostic(STAR_DIAG_ERROR,
+                                         "Expecting floating point value for " +
+                                               name + ", found '" + data.str() +
+                                             "' instead.", line_num_));
+    }
+    else {
+      LOG_WARNING(this->FormatDiagnostic(STAR_DIAG_WARNING,
+                                         "Expecting floating point value for " +
+                                         name + ", found '" + data.str() +
+                                         "' instead.", line_num_));
+    }
+  }
+  return value;
+}
+
 int StarParser::TryGetInt(const StringRef& data, const String& name) const
 {
   std::pair<bool, int> value = data.to_int();
@@ -90,6 +136,27 @@ int StarParser::TryGetInt(const StringRef& data, const String& name) const
   return value.second;
 }
 
+std::pair<bool, int> StarParser::TryGetInt(const StringRef& data,
+                                           const String& name,
+                                           bool may_fail) const
+{
+  std::pair<bool, int> value = data.to_int();
+  if (!value.first) {
+    if (!may_fail) {
+      throw IOException(this->FormatDiagnostic(STAR_DIAG_ERROR,
+                                               "Expecting integer value for " +
+                                               name + ", found '" + data.str() +
+                                               "' instead.", line_num_));
+    } else {
+      LOG_WARNING(this->FormatDiagnostic(STAR_DIAG_WARNING,
+                                         "Expecting integer value for " +
+                                         name + ", found '" + data.str() +
+                                         "' instead.", line_num_));
+    }
+  }
+  return value;
+}
+
 bool StarParser::TryGetBool(const StringRef& data, const String& name) const
 {
   if (data.length() == 1) {
diff --git a/modules/io/src/mol/star_parser.hh b/modules/io/src/mol/star_parser.hh
index 96da40e73..75feb7adf 100644
--- a/modules/io/src/mol/star_parser.hh
+++ b/modules/io/src/mol/star_parser.hh
@@ -23,6 +23,8 @@
 /*
   Author: Marco Biasini
  */
+#include <boost/iostreams/filtering_stream.hpp>
+
 #include <iostream>
 #include <fstream>
 #include <vector>
@@ -148,6 +150,14 @@ public:
   ///     OnBeginData() returned true.
   virtual void OnEndData() { }
 
+  /// \brief try to convert a value to Real, on failure raise an exception.
+  ///
+  /// \param data value to be converted
+  /// \param name to be included in the message
+  ///
+  /// \return converted value
+  Real TryGetReal(const StringRef& data, const String& name) const;
+
   /// \brief try to convert a value to float, on failure raise an exception.
   ///
   /// \param data value to be converted
@@ -156,6 +166,17 @@ public:
   /// \return converted value
   float TryGetFloat(const StringRef& data, const String& name) const;
 
+  /// \brief try to convert a value to float, on failure raise an exception.
+  ///
+  /// \param data value to be converted
+  /// \param name to be included in the message
+  /// \param may_fail decides if an exception is raised (false) or not (true)
+  ///
+  /// \return converted value
+  std::pair<bool, float> TryGetFloat(const StringRef& data,
+                                     const String& name,
+                                     bool may_fail) const;
+
   /// \brief try to convert a value to integer, on failure raise an exception.
   ///
   /// \param data value to be converted
@@ -164,6 +185,17 @@ public:
   /// \return converted value
   int TryGetInt(const StringRef& data, const String& name) const;
 
+  /// \brief try to convert a value to integer, exception can be turned off.
+  ///
+  /// \param data value to be converted
+  /// \param name to be included in the message
+  /// \param may_fail decides if an exception is raised (false) or not (true)
+  ///
+  /// \return pair with value and indicator if conversion worked
+  std::pair<bool, int> TryGetInt(const StringRef& data,
+                                 const String& name,
+                                 bool may_fail) const;
+
   /// \brief try to convert a value to bool, on failure raise an exception.
   ///
   /// \param data value to be converted
@@ -238,7 +270,7 @@ private:
   void DiagnoseUnknown();
   bool ParseMultilineValue(String& value, bool skip=false);
   std::ifstream fstream_;
-  std::istream& stream_;
+  boost::iostreams::filtering_stream<boost::iostreams::input> stream_;
   String        filename_;
   int           line_num_;
   bool          has_current_line_;
diff --git a/modules/io/tests/CMakeLists.txt b/modules/io/tests/CMakeLists.txt
index f10be7bc4..bcad4b24e 100644
--- a/modules/io/tests/CMakeLists.txt
+++ b/modules/io/tests/CMakeLists.txt
@@ -8,6 +8,7 @@ set(OST_IO_UNIT_TESTS
   test_iomanager.cc
   tests.cc
   test_star_parser.cc
+  test_mmcif_reader.cc
 )
 if (ENABLE_IMG)
   list(APPEND OST_IO_UNIT_TESTS test_io_img.cc)
diff --git a/modules/io/tests/test_mmcif_reader.cc b/modules/io/tests/test_mmcif_reader.cc
new file mode 100644
index 000000000..09541ca29
--- /dev/null
+++ b/modules/io/tests/test_mmcif_reader.cc
@@ -0,0 +1,233 @@
+//------------------------------------------------------------------------------
+// This file is part of the OpenStructure project <www.openstructure.org>
+//
+// Copyright (C) 2008-2011 by the OpenStructure authors
+//
+// This library is free software; you can redistribute it and/or modify it under
+// the terms of the GNU Lesser General Public License as published by the Free
+// Software Foundation; either version 3.0 of the License, or (at your option)
+// any later version.
+// This library is distributed in the hope that it will be useful, but WITHOUT
+// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+// FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public License for more
+// details.
+//
+// You should have received a copy of the GNU Lesser General Public License
+// along with this library; if not, write to the Free Software Foundation, Inc.,
+// 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
+//------------------------------------------------------------------------------
+
+#include <fstream>
+#include <ost/io/io_exception.hh>
+#include <ost/io/mol/mmcif_reader.hh>
+
+#define BOOST_AUTO_TEST_DYN_LINK
+#include <boost/test/unit_test.hpp>
+
+
+using namespace ost;
+using namespace ost::io;
+
+class TestMMCifParserProtected : MMCifParser {
+public:
+  TestMMCifParserProtected(std::istream& stream, mol::EntityHandle& ent_handle):
+    MMCifParser(stream, ent_handle, IOProfile())
+  { }
+
+  TestMMCifParserProtected(std::istream& stream, mol::EntityHandle& ent_handle,
+                           const IOProfile& profile):
+    MMCifParser(stream, ent_handle, profile)
+  { }
+
+  using MMCifParser::IsValidPDBIdent;
+  using MMCifParser::ParseAtomIdent;
+  using MMCifParser::ParseAndAddAtom;
+  using MMCifParser::EnsureEnoughColumns;
+  using MMCifParser::TryStoreIdx;
+};
+
+BOOST_AUTO_TEST_SUITE( io );
+
+BOOST_AUTO_TEST_CASE(mmcif_isvalidpdbid)
+{
+  mol::EntityHandle eh=mol::CreateEntity();
+
+  // on changing the tests for a PDB id in mmcif files, extend this unit test
+  BOOST_MESSAGE("  Running mmcif_isvalidpdbid tests...");
+  std::ifstream s("testfiles/mmcif/atom_site.mmcif");
+  TestMMCifParserProtected tmmcif_p(s, eh);
+  StringRef id = StringRef("1FOO", 4);
+  BOOST_MESSAGE("    Testing valid id ('"+ id.str() +"')...");
+  BOOST_CHECK(tmmcif_p.IsValidPDBIdent(id));
+  BOOST_MESSAGE("    done.");
+  id = StringRef("this is to long for a PDB id", 28);
+  BOOST_MESSAGE("    Testing oversized PDB id ('"+ id.str() +"')...");
+  BOOST_CHECK(!tmmcif_p.IsValidPDBIdent(id));
+  BOOST_MESSAGE("    done.");
+  id = StringRef("nFOO", 4);
+  BOOST_MESSAGE("    Testing PDB id with missing number ('"+ id.str() +"')...");
+  BOOST_CHECK(!tmmcif_p.IsValidPDBIdent(id));
+  BOOST_MESSAGE("    done.");
+}
+
+BOOST_AUTO_TEST_CASE(mmcif_trystoreidx)
+{
+  mol::EntityHandle eh = mol::CreateEntity();
+
+  BOOST_MESSAGE("  Running mmcif_trystoreidx tests...");
+  std::ifstream s("testfiles/mmcif/atom_site.mmcif");
+  TestMMCifParserProtected tmmcif_p(s, eh, IOProfile());
+  StarLoopDesc mmcif_h;
+  mmcif_h.SetCategory(StringRef("Foo", 3));
+  // negative
+  BOOST_CHECK_THROW(tmmcif_p.TryStoreIdx(0, "bar", mmcif_h), IOException);
+  // positive
+  mmcif_h.Add(StringRef("bar", 3));
+  BOOST_CHECK_NO_THROW(tmmcif_p.TryStoreIdx(0, "bar", mmcif_h));
+}
+
+BOOST_AUTO_TEST_CASE(mmcif_atom_site_header)
+{
+  mol::EntityHandle eh=mol::CreateEntity();
+
+  BOOST_MESSAGE("  Running mmcif_atom_site_header tests...");
+  std::ifstream s("testfiles/mmcif/atom_site.mmcif");
+  MMCifParser mmcif_p(s, eh, IOProfile());
+  StarLoopDesc mmcif_h;
+  mmcif_h.SetCategory(StringRef("atom_site", 9));
+  BOOST_MESSAGE("    auth_asym_id");
+  BOOST_CHECK_THROW(mmcif_p.OnBeginLoop(mmcif_h), IOException);
+  mmcif_h.Add(StringRef("auth_asym_id", 12));
+  BOOST_MESSAGE("    id");
+  BOOST_CHECK_THROW(mmcif_p.OnBeginLoop(mmcif_h), IOException);
+  mmcif_h.Add(StringRef("id", 2));
+  BOOST_MESSAGE("    label_alt_id");
+  BOOST_CHECK_THROW(mmcif_p.OnBeginLoop(mmcif_h), IOException);
+  mmcif_h.Add(StringRef("label_alt_id", 12));
+  BOOST_MESSAGE("    label_asym_id");
+  BOOST_CHECK_THROW(mmcif_p.OnBeginLoop(mmcif_h), IOException);
+  mmcif_h.Add(StringRef("label_asym_id", 13));
+  BOOST_MESSAGE("    label_atom_id");
+  BOOST_CHECK_THROW(mmcif_p.OnBeginLoop(mmcif_h), IOException);
+  mmcif_h.Add(StringRef("label_atom_id", 13));
+  BOOST_MESSAGE("    label_comp_id");
+  BOOST_CHECK_THROW(mmcif_p.OnBeginLoop(mmcif_h), IOException);
+  mmcif_h.Add(StringRef("label_comp_id", 13));
+  BOOST_MESSAGE("    label_entity_id");
+  BOOST_CHECK_THROW(mmcif_p.OnBeginLoop(mmcif_h), IOException);
+  mmcif_h.Add(StringRef("label_entity_id", 15));
+  BOOST_MESSAGE("    label_seq_id");
+  BOOST_CHECK_THROW(mmcif_p.OnBeginLoop(mmcif_h), IOException);
+  mmcif_h.Add(StringRef("label_seq_id", 12));
+  BOOST_MESSAGE("    type_symbol");
+  BOOST_CHECK_THROW(mmcif_p.OnBeginLoop(mmcif_h), IOException);
+  mmcif_h.Add(StringRef("type_symbol", 11));
+  BOOST_MESSAGE("    Cartn_x");
+  BOOST_CHECK_THROW(mmcif_p.OnBeginLoop(mmcif_h), IOException);
+  mmcif_h.Add(StringRef("Cartn_x", 7));
+  BOOST_MESSAGE("    Cartn_y");
+  BOOST_CHECK_THROW(mmcif_p.OnBeginLoop(mmcif_h), IOException);
+  mmcif_h.Add(StringRef("Cartn_y", 7));
+  BOOST_MESSAGE("    Cartn_z");
+  BOOST_CHECK_THROW(mmcif_p.OnBeginLoop(mmcif_h), IOException);
+  mmcif_h.Add(StringRef("Cartn_z", 7));
+  BOOST_MESSAGE("  done.");
+}
+
+BOOST_AUTO_TEST_CASE(mmcif_ensureenoughcolumns)
+{
+  mol::EntityHandle eh=mol::CreateEntity();
+
+  BOOST_MESSAGE("  Running mmcif_ensureenoughcolumns tests...");
+  std::ifstream s("testfiles/mmcif/atom_site.mmcif");
+  IOProfile profile;
+  TestMMCifParserProtected tmmcif_p(s, eh, profile);
+  std::vector<StringRef> cols;
+  BOOST_MESSAGE("    testing short atom_site entry");
+  cols.push_back(StringRef("ATOM", 4));
+  BOOST_CHECK_THROW(tmmcif_p.EnsureEnoughColumns(cols, 2), IOException);
+  BOOST_MESSAGE("    testing correct number");
+  BOOST_CHECK(tmmcif_p.EnsureEnoughColumns(cols, 1));
+  BOOST_MESSAGE("    testing fault tolerant profile");
+  profile.fault_tolerant = true;
+  BOOST_CHECK(!tmmcif_p.EnsureEnoughColumns(cols, 2));
+  BOOST_MESSAGE("  done.");
+}
+
+BOOST_AUTO_TEST_CASE(mmcif_parseatomident)
+{
+  mol::EntityHandle eh = mol::CreateEntity();
+
+  BOOST_MESSAGE("  Running mmcif_parseatomident tests...");
+  std::ifstream s("testfiles/mmcif/atom_site.mmcif");
+  IOProfile profile;
+  TestMMCifParserProtected tmmcif_p(s, eh, profile);
+  std::vector<StringRef> cols;
+  String chain_name;
+  StringRef res_name;
+  mol::ResNum resnum(0);
+  StringRef atom_name;
+  char alt_loc;
+
+  BOOST_MESSAGE("    testing short atom_site entry");
+  // negative
+  cols.push_back(StringRef("ATOM", 4));
+  BOOST_CHECK_THROW(tmmcif_p.ParseAtomIdent(cols,
+                                            chain_name,
+                                            res_name,
+                                            resnum,
+                                            atom_name,
+                                            alt_loc), IOException);
+  // positive
+  StarLoopDesc tmmcif_h;
+  tmmcif_h.SetCategory(StringRef("atom_site", 9));
+  // build header
+  //mmcif_h.Add(StringRef("AUTH_ASYM_ID", 12));
+  /*
+    this->TryStoreIdx(AUTH_ASYM_ID,    "auth_asym_id",    header);
+    this->TryStoreIdx(ID,              "id",              header);
+    this->TryStoreIdx(LABEL_ALT_ID,    "label_alt_id",    header);
+    this->TryStoreIdx(LABEL_ASYM_ID,   "label_asym_id",   header);
+    this->TryStoreIdx(LABEL_ATOM_ID,   "label_atom_id",   header);
+    this->TryStoreIdx(LABEL_COMP_ID,   "label_comp_id",   header);
+    this->TryStoreIdx(LABEL_ENTITY_ID, "label_entity_id", header);
+    this->TryStoreIdx(LABEL_SEQ_ID,    "label_seq_id",    header);
+    this->TryStoreIdx(TYPE_SYMBOL,     "type_symbol",     header);
+    this->TryStoreIdx(CARTN_X, "Cartn_x", header);
+    this->TryStoreIdx(CARTN_Y, "Cartn_y", header);
+    this->TryStoreIdx(CARTN_Z, "Cartn_z", header);
+*/
+
+  BOOST_MESSAGE("  done.");
+}
+
+BOOST_AUTO_TEST_CASE(mmcif_parseandaddatom)
+{
+  mol::EntityHandle eh = mol::CreateEntity();
+
+  BOOST_MESSAGE("  Running mmcif_parseandaddatom tests...");
+  std::ifstream s("testfiles/mmcif/atom_site.mmcif");
+  TestMMCifParserProtected tmmcif_p(s, eh, IOProfile());
+  std::vector<StringRef> cols;
+
+  BOOST_MESSAGE("    testing short atom_site entry");
+  cols.push_back(StringRef("ATOM", 4));
+  BOOST_CHECK_THROW(tmmcif_p.ParseAndAddAtom(cols), IOException);
+  BOOST_MESSAGE("  done.");
+}
+
+BOOST_AUTO_TEST_CASE(mmcif_testreader)
+{
+  mol::EntityHandle eh = mol::CreateEntity();
+  std::ifstream s("testfiles/mmcif/atom_site.mmcif");
+  IOProfile profile;
+  MMCifParser mmcif_p(s, eh, profile);
+
+  mmcif_p.Parse();
+
+  BOOST_REQUIRE_EQUAL(eh.GetChainCount(),   2);
+  BOOST_REQUIRE_EQUAL(eh.GetResidueCount(), 4);
+  BOOST_REQUIRE_EQUAL(eh.GetAtomCount(),   25);
+}
+
+BOOST_AUTO_TEST_SUITE_END();
diff --git a/modules/io/tests/testfiles/mmcif/atom_site.mmcif b/modules/io/tests/testfiles/mmcif/atom_site.mmcif
new file mode 100644
index 000000000..6c9a29637
--- /dev/null
+++ b/modules/io/tests/testfiles/mmcif/atom_site.mmcif
@@ -0,0 +1,49 @@
+data_1BAR
+
+loop_
+_atom_site.group_PDB
+_atom_site.type_symbol
+_atom_site.label_atom_id
+_atom_site.label_comp_id
+_atom_site.label_asym_id
+_atom_site.label_seq_id
+_atom_site.label_alt_id
+_atom_site.label_entity_id
+_atom_site.Cartn_x
+_atom_site.Cartn_y
+_atom_site.Cartn_z
+_atom_site.occupancy
+_atom_site.B_iso_or_equiv
+_atom_site.footnote_id
+_atom_site.auth_seq_id
+_atom_site.id
+_atom_site.auth_asym_id
+ATOM N  N   VAL  A  11  . 1  25.369  30.691  11.795  1.00  17.93  .  11   1  A
+ATOM C  CA  VAL  A  11  . 1  25.970  31.965  12.332  1.00  17.75  .  11   2  A
+ATOM C  C   VAL  A  11  . 1  25.569  32.010  13.808  1.00  17.83  .  11   3  A
+ATOM O  O   VAL  A  11  . 1  24.735  31.190  14.167  1.00  17.53  .  11   4  A
+ATOM C  CB  VAL  A  11  . 1  25.379  33.146  11.540  1.00  17.66  .  11   5  A
+ATOM C  CG1 VAL  A  11  . 1  25.584  33.034  10.030  1.00  18.86  .  11   6  A
+ATOM C  CG2 VAL  A  11  . 1  23.933  33.309  11.872  1.00  17.12  .  11   7  A
+ATOM N  N   THR  A  12  . 1  26.095  32.930  14.590  1.00  18.97  4  12   8  A
+ATOM C  CA  THR  A  12  . 1  25.734  32.995  16.032  1.00  19.80  4  12   9  A
+ATOM C  C   THR  A  12  . 1  24.695  34.106  16.113  1.00  20.92  4  12  10  A
+ATOM O  O   THR  A  12  . 1  24.869  35.118  15.421  1.00  21.84  4  12  11  A
+ATOM C  CB  THR  A  12  . 1  26.911  33.346  17.018  1.00  20.51  4  12  12  A
+ATOM O  OG1 THR  A  12  3 1  27.946  33.921  16.183  0.50  20.29  4  12  13  A
+ATOM O  OG1 THR  A  12  4 1  27.769  32.142  17.103  0.50  20.59  4  12  14  A
+ATOM C  CG2 THR  A  12  3 1  27.418  32.181  17.878  0.50  20.47  4  12  15  A
+ATOM C  CG2 THR  A  12  4 1  26.489  33.778  18.426  0.50  20.00  4  12  16  A
+ATOM N  N   ILE  A  13  . 1  23.664  33.855  16.884  1.00  22.08  .  13  17  A
+ATOM C  CA  ILE  A  13  . 1  22.623  34.850  17.093  1.00  23.44  .  13  18  A
+ATOM C  C   ILE  A  13  . 1  22.657  35.113  18.610  1.00  25.77  .  13  19  A
+ATOM O  O   ILE  A  13  . 1  23.123  34.250  19.406  1.00  26.28  .  13  20  A
+ATOM C  CB  ILE  A  13  . 1  21.236  34.463  16.492  1.00  22.67  .  13  21  A
+ATOM C  CG1 ILE  A  13  . 1  20.478  33.469  17.371  1.00  22.14  .  13  22  A
+ATOM C  CG2 ILE  A  13  . 1  21.357  33.986  15.016  1.00  21.75  .  13  23  A
+# - - - - data truncated for brevity - - - -
+HETATM C C1 APS  C  14  1  1  4.171  29.012   7.116  0.58  17.27  1 300  101  A
+HETATM C C2 APS  C  14  1  1  4.949  27.758   6.793  0.58  16.95  1 300  102  A
+HETATM O O3 APS  C  14  1  1  4.800  26.678   7.393  0.58  16.85  1 300  103  A
+HETATM N N4 APS  C  14  1  1  5.930  27.841   5.869  0.58  16.43  1 300  104  A
+# - - - - data truncated for brevity - - - -
-- 
GitLab