From 0885e63ee5dbe782c02a1b20064195830f3b5a28 Mon Sep 17 00:00:00 2001 From: Xavier Robin <xavier.robin@unibas.ch> Date: Fri, 19 Jul 2024 09:05:53 +0200 Subject: [PATCH] Document and log about unread models in mmCIF reader OST only reads the first model (_atom_site.pdbx_PDB_model_num) of an mmCIF file. This commit clarifies it in the doc, and adds a warning to the output if models are ignored. --- modules/io/doc/mmcif.rst | 3 +++ modules/io/pymod/__init__.py | 7 ++++--- modules/io/src/mol/mmcif_reader.cc | 14 ++++++++++---- modules/io/src/mol/mmcif_reader.hh | 1 + 4 files changed, 18 insertions(+), 7 deletions(-) diff --git a/modules/io/doc/mmcif.rst b/modules/io/doc/mmcif.rst index 1c7d17c98..5c1874454 100644 --- a/modules/io/doc/mmcif.rst +++ b/modules/io/doc/mmcif.rst @@ -105,6 +105,9 @@ Notes: It is a known limitation of the mmCIF format to allow ambiguous identifiers for waters (and ligands to some extent) and so we have to require these additional identifiers. +* An mmCIF file can contain several models (``atom.site.pdbx_PDB_model_num``). + Only the first model occurring in the mmCIF file is read (regardless of the + actual model number). If extra models are ignored, a warning is logged. Info Classes diff --git a/modules/io/pymod/__init__.py b/modules/io/pymod/__init__.py index 66a518def..ea3be23a8 100644 --- a/modules/io/pymod/__init__.py +++ b/modules/io/pymod/__init__.py @@ -348,9 +348,10 @@ def LoadCHARMMTraj(crd, dcd_file=None, profile='CHARMM', def LoadMMCIF(filename, fault_tolerant=None, calpha_only=None, profile='DEFAULT', remote=False, seqres=False, info=False): """ - Load a mmCIF file and return one or more entities. Several options allow to - customize the exact behaviour of the mmCIF import. For more information on - these options, see :doc:`profile`. + Load an mmCIF file and return the first model as an entity. + + Several options allow to customize the exact behaviour of the mmCIF import. + For more information on these options, see :doc:`profile`. Residues are flagged as ligand if they are not waters nor covered by an ``entity_poly`` record (ie. they are non-polymer entities in diff --git a/modules/io/src/mol/mmcif_reader.cc b/modules/io/src/mol/mmcif_reader.cc index fd440812e..5ebb954dc 100644 --- a/modules/io/src/mol/mmcif_reader.cc +++ b/modules/io/src/mol/mmcif_reader.cc @@ -478,15 +478,21 @@ void MMCifReader::ParseAndAddAtom(const std::vector<StringRef>& columns) mol::ResNum res_num(0); bool valid_res_num = false; if (indices_[PDBX_PDB_MODEL_NUM] != -1) { + int model_id = TryGetInt(columns[indices_[PDBX_PDB_MODEL_NUM]], + "atom_site.pdbx_PDB_model_num"); if (has_model_) { - if (curr_model_ != TryGetInt(columns[indices_[PDBX_PDB_MODEL_NUM]], - "atom_site.pdbx_PDB_model_num")) { + if (curr_model_ != model_id) { + if (warned_ignored_model_.find(model_id) == warned_ignored_model_.end()) { + LOG_WARNING("Ignorning new model " << model_id << + ". Only model " << curr_model_ << " was read."); + warned_ignored_model_.insert(model_id); + } return; } } else { + LOG_INFO("Reading model " << model_id << "."); has_model_ = true; - curr_model_ = TryGetInt(columns[indices_[PDBX_PDB_MODEL_NUM]], - "atom_site.pdbx_PDB_model_num"); + curr_model_ = model_id; } } diff --git a/modules/io/src/mol/mmcif_reader.hh b/modules/io/src/mol/mmcif_reader.hh index 23cfc2f4b..8521c048d 100644 --- a/modules/io/src/mol/mmcif_reader.hh +++ b/modules/io/src/mol/mmcif_reader.hh @@ -691,6 +691,7 @@ private: String subst_res_id_; ///< work around for missing label_seq_id's bool has_model_; ///< keep track of models through different atom_sites int curr_model_; ///< if we have pdbx_PDB_model_num, store no. + std::set<int> warned_ignored_model_; // keep track of ignored model warnings std::vector<std::pair<mol::ChainHandle, String> > chain_id_pairs_; ///< chain and label_entity_id MMCifEntityDescMap entity_desc_map_; ///< stores entity items -- GitLab