From aac17c6cf70a13e6aa53792cf162b8d7a4022ef4 Mon Sep 17 00:00:00 2001 From: Stefan Bienert <stefan.bienert@unibas.ch> Date: Wed, 20 Jul 2011 12:12:48 +0200 Subject: [PATCH] First version of a MMCIF Parser, early stage (experimental) --- modules/io/pymod/CMakeLists.txt | 1 + modules/io/pymod/__init__.py | 101 +++- modules/io/pymod/wrap_io.cc | 2 + modules/io/src/mol/CMakeLists.txt | 2 + modules/io/src/mol/mmcif_reader.cc | 550 ++++++++++++++++++ modules/io/src/mol/mmcif_reader.hh | 208 +++++++ modules/io/src/mol/star_parser.cc | 71 ++- modules/io/src/mol/star_parser.hh | 34 +- modules/io/tests/CMakeLists.txt | 1 + modules/io/tests/test_mmcif_reader.cc | 233 ++++++++ .../io/tests/testfiles/mmcif/atom_site.mmcif | 49 ++ 11 files changed, 1248 insertions(+), 4 deletions(-) create mode 100644 modules/io/src/mol/mmcif_reader.cc create mode 100644 modules/io/src/mol/mmcif_reader.hh create mode 100644 modules/io/tests/test_mmcif_reader.cc create mode 100644 modules/io/tests/testfiles/mmcif/atom_site.mmcif diff --git a/modules/io/pymod/CMakeLists.txt b/modules/io/pymod/CMakeLists.txt index 902089f7f..023e000b8 100644 --- a/modules/io/pymod/CMakeLists.txt +++ b/modules/io/pymod/CMakeLists.txt @@ -1,6 +1,7 @@ set(OST_IO_PYMOD_SOURCES wrap_io.cc export_pdb_io.cc + export_mmcif_io.cc ) if (ENABLE_IMG) diff --git a/modules/io/pymod/__init__.py b/modules/io/pymod/__init__.py index 74047dec0..57a0464bb 100644 --- a/modules/io/pymod/__init__.py +++ b/modules/io/pymod/__init__.py @@ -88,7 +88,7 @@ def LoadPDB(filename, restrict_chains="", no_hetatms=None, profile='DEFAULT', remote=False, dialect=None, strict_hydrogens=None, seqres=False): """ - Load PDB file from disk and returns one or more entities. Several options + Load PDB file from disk and return one or more entities. Several options allow to customize the exact behaviour of the PDB import. For more information on these options, see :doc:`profile`. @@ -264,6 +264,105 @@ def LoadCHARMMTraj(crd, dcd_file=None, profile='CHARMM', raise ValueError("No DCD filename given") return LoadCHARMMTraj_(crd, dcd_file, stride, lazy_load) +def LoadMMCIF(filename, restrict_chains="", no_hetatms=None, + fault_tolerant=None, load_multi=False, quack_mode=None, + join_spread_atom_records=None, calpha_only=None, + profile='DEFAULT', remote=False, strict_hydrogens=None, + seqres=False): + """ + Load MMCIF file from disk and return one or more entities. Several options + allow to customize the exact behaviour of the MMCIF import. For more + information on these options, see :doc:`profile`. + + Residues are flagged as ligand if they are mentioned in a HET record. + + :param restrict_chains: If not an empty string, only chains listed in the + string will be imported. + + :param fault_tolerant: Enable/disable fault-tolerant import. If set, overrides + the value of :attr:`IOProfile.fault_tolerant`. + + :param no_hetatms: If set to True, HETATM records will be ignored. Overrides + the value of :attr:`IOProfile.no_hetatms` + + :param load_multi: If set to True, a list of entities will be returned instead + of only the first. This is useful when dealing with multi-PDB files. + + :param join_spread_atom_records: If set, overrides the value of + :attr:`IOProfile.join_spread_atom_records`. + + :param remote: If set to true, the method tries to load the pdb from the + remote pdb repository www.pdb.org. The filename is then interpreted as the + pdb id. + + :rtype: :class:`~ost.mol.EntityHandle` or a list thereof if `load_multi` is + True. + + :param seqres: Whether to read SEQRES records. If set to true, the loaded + entity and seqres entry will be returned as a tuple. + + :param strict_hydrogens: If set, overrides the value of + :attr:`IOProfile.strict_hydrogens`. + + :raises: :exc:`~ost.io.IOException` if the import fails due to an erroneous or + inexistent file + """ + def _override(val1, val2): + if val2!=None: + return val2 + else: + return val1 + if isinstance(profile, str): + prof = profiles[profile].Copy() + else: + prof = profile.Copy() + + prof.calpha_only=_override(prof.calpha_only, calpha_only) + prof.no_hetatms=_override(prof.no_hetatms, no_hetatms) + prof.quack_mode=_override(prof.quack_mode, quack_mode) + prof.strict_hydrogens=_override(prof.strict_hydrogens, strict_hydrogens) + prof.fault_tolerant=_override(prof.fault_tolerant, fault_tolerant) + prof.join_spread_atom_records=_override(prof.join_spread_atom_records, + join_spread_atom_records) + + if remote: + output_dir = tempfile.gettempdir() + if __GetModelFromPDB(filename, output_dir): + filename = os.path.join(output_dir, 'pdb%s.ent.gz' % filename) + else: + raise IOError('Can not load PDB %s from www.pdb.org'%filename) + + conop_inst = conop.Conopology.Instance() + builder = conop_inst.GetBuilder("DEFAULT") + + builder.strict_hydrogens = prof.strict_hydrogens + + try: + #if load_multi: + # ent_list=[] + # while reader.HasNext(): + # ent=mol.CreateEntity() + # reader.Import(ent, restrict_chains) + # conop_inst.ConnectAll(builder, ent, 0) + # ent_list.append(ent) + # if len(ent_list)==0: + # raise IOError("File doesn't contain any entities") + # return ent_list + #else: + ent = mol.CreateEntity() + reader = MMCifParser(filename, ent, prof) + #reader.read_seqres = seqres + #if reader.HasNext(): + reader.Parse() + conop_inst.ConnectAll(builder, ent, 0) + #else: + # raise IOError("File doesn't contain any entities") + #if seqres: + # return ent, reader.seqres + return ent + except: + raise + ## \example fft_li.py # # This scripts loads one or more images and shows their Fourier Transforms on diff --git a/modules/io/pymod/wrap_io.cc b/modules/io/pymod/wrap_io.cc index af5087cd2..1d9e3b936 100644 --- a/modules/io/pymod/wrap_io.cc +++ b/modules/io/pymod/wrap_io.cc @@ -72,6 +72,7 @@ BOOST_PYTHON_FUNCTION_OVERLOADS(save_charmm_trj_ov, } void export_pdb_io(); +void export_mmcif_io(); #if OST_IMG_ENABLED void export_map_io(); #endif @@ -117,6 +118,7 @@ BOOST_PYTHON_MODULE(_ost_io) def("LoadMAE", &LoadMAE); export_pdb_io(); + export_mmcif_io(); #if OST_IMG_ENABLED export_map_io(); #endif diff --git a/modules/io/src/mol/CMakeLists.txt b/modules/io/src/mol/CMakeLists.txt index 1201eb58f..db328bb9c 100644 --- a/modules/io/src/mol/CMakeLists.txt +++ b/modules/io/src/mol/CMakeLists.txt @@ -15,12 +15,14 @@ chemdict_parser.cc io_profile.cc dcd_io.cc star_parser.cc +mmcif_reader.cc PARENT_SCOPE ) set(OST_IO_MOL_HEADERS chemdict_parser.hh star_parser.hh +mmcif_reader.hh io_profile.hh dcd_io.hh entity_io_crd_handler.hh diff --git a/modules/io/src/mol/mmcif_reader.cc b/modules/io/src/mol/mmcif_reader.cc new file mode 100644 index 000000000..535aedbfa --- /dev/null +++ b/modules/io/src/mol/mmcif_reader.cc @@ -0,0 +1,550 @@ +//------------------------------------------------------------------------------ +// This file is part of the OpenStructure project <www.openstructure.org> +// +// Copyright (C) 2008-2011 by the OpenStructure authors +// +// This library is free software; you can redistribute it and/or modify it under +// the terms of the GNU Lesser General Public License as published by the Free +// Software Foundation; either version 3.0 of the License, or (at your option) +// any later version. +// This library is distributed in the hope that it will be useful, but WITHOUT +// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +// FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more +// details. +// +// You should have received a copy of the GNU Lesser General Public License +// along with this library; if not, write to the Free Software Foundation, Inc., +// 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA +//------------------------------------------------------------------------------ + +#include <ctype.h> + +//#include <boost/iostreams/filter/gzip.hpp> +//#include <boost/filesystem/convenience.hpp> +//#include <boost/algorithm/string.hpp> +//#include <boost/algorithm/string/trim.hpp> + +//#include <boost/format.hpp> +//#include <boost/lexical_cast.hpp> + +#include <ost/profile.hh> +#include <ost/log.hh> +#include <ost/mol/xcs_editor.hh> +#include <ost/io/mol/mmcif_reader.hh> + +namespace ost { namespace io { + +MMCifParser::MMCifParser(std::istream& stream, mol::EntityHandle& ent_handle, + const IOProfile& profile): + StarParser(stream, true), profile_(profile), ent_handle_(ent_handle) +{ + this->Init(); +} + +MMCifParser::MMCifParser(const String& filename, mol::EntityHandle& ent_handle, + const IOProfile& profile): + StarParser(filename, true), profile_(profile), ent_handle_(ent_handle) +{ + this->Init(); +} + +void MMCifParser::Init() +{ + warned_name_mismatch_ = false; + category_ = DONT_KNOW; + chain_count_ = 0; + record_type_ = ""; + atom_count_ = 0; + residue_count_ = 0; + auth_chain_id_ = false; + go_on_ = true; + //memset(indices_, -1, MAX_ITEMS_IN_ROW * sizeof(int)); + restrict_chains_ = ""; + //curr_chain_ = mol::ChainHandle(); + //curr_residue_ = mol::ResidueHandle(); +} + +void MMCifParser::ClearState() +{ + curr_chain_ = mol::ChainHandle(); + curr_residue_ = mol::ResidueHandle(); + chain_count_ = 0; + residue_count_ = 0; + atom_count_ = 0; + category_ = DONT_KNOW; + record_type_ = ""; + warned_name_mismatch_ = false; +} + +void MMCifParser::SetRestrictChains(const String& restrict_chains) +{ + restrict_chains_ = restrict_chains; +} + +bool MMCifParser::IsValidPDBIdent(const StringRef& pdbid) +{ + if (pdbid.length() == PDBID_LEN && isdigit(pdbid[0])) { + return true; + } + return false; +} + +bool MMCifParser::OnBeginData(const StringRef& data_name) +{ + LOG_DEBUG("MCIFFReader: " << profile_); + Profile profile_import("MMCifReader::OnBeginData"); + + // check for PDB id + if (!this->IsValidPDBIdent(data_name)) { + throw IOException(this->FormatDiagnostic(STAR_DIAG_ERROR, + "No valid PDB id found for data block, read instead \'" + + data_name.str() + "\'", + this->GetCurrentLinenum())); + } + + this->ClearState(); + + go_on_ = true; + + return true; +} + +bool MMCifParser::OnBeginLoop(const StarLoopDesc& header) +{ + // set whole array to -1 + memset(indices_, -1, MAX_ITEMS_IN_ROW * sizeof(int)); + // walk through possible categories + if (header.GetCategory() == "atom_site") { + category_ = ATOM_SITE; + // mandatory items + this->TryStoreIdx(AUTH_ASYM_ID, "auth_asym_id", header); + this->TryStoreIdx(ID, "id", header); + this->TryStoreIdx(LABEL_ALT_ID, "label_alt_id", header); + this->TryStoreIdx(LABEL_ASYM_ID, "label_asym_id", header); + this->TryStoreIdx(LABEL_ATOM_ID, "label_atom_id", header); + this->TryStoreIdx(LABEL_COMP_ID, "label_comp_id", header); + this->TryStoreIdx(LABEL_ENTITY_ID, "label_entity_id", header); + this->TryStoreIdx(LABEL_SEQ_ID, "label_seq_id", header); + this->TryStoreIdx(TYPE_SYMBOL, "type_symbol", header); + // assumed mandatory + this->TryStoreIdx(CARTN_X, "Cartn_x", header); + this->TryStoreIdx(CARTN_Y, "Cartn_y", header); + this->TryStoreIdx(CARTN_Z, "Cartn_z", header); + // optional + indices_[PDBX_PDB_INS_CODE] = header.GetIndex("pdbx_PDB_ins_code"); + indices_[OCCUPANCY] = header.GetIndex("occupancy"); + indices_[B_ISO_OR_EQUIV] = header.GetIndex("B_iso_or_equiv"); + indices_[GROUP_PDB] = header.GetIndex("group_PDB"); + return true; + } /*else if (header.GetCategory()=="entity_poly") { + } else if (header.GetCategory()=="pdbx_poly_seq_scheme") { + } else if (header.GetCategory()=="pdbx_struct_assembly") { + } else if (header.GetCategory()=="struct_conf") { + }*/ + return false; +} + +bool MMCifParser::EnsureEnoughColumns(const std::vector<StringRef>& columns, + size_t size) +{ + if (columns.size() < size) { + if (profile_.fault_tolerant) { + return false; + } + throw IOException(this->FormatDiagnostic(STAR_DIAG_ERROR, + "Not enough data fields", + this->GetCurrentLinenum())); + } + return true; +} + +mol::ResNum to_res_num(int num, char ins_code) +{ + return mol::ResNum(num, ins_code==' ' ? '\0' : ins_code); +} + +bool MMCifParser::ParseAtomIdent(const std::vector<StringRef>& columns, + String& chain_name, + StringRef& res_name, + mol::ResNum& resnum, + StringRef& atom_name, + char& alt_loc) +{ + // is checking really neccessary? just by using OnBeginData, enough columns + // should be available! + if (!this->EnsureEnoughColumns(columns, 12)) { + return false; + } + // ATOM name + atom_name = columns[indices_[LABEL_ATOM_ID]]; + if (profile_.calpha_only) { // unit test profile + if (atom_name != StringRef("CA", 2)) { + return false; + } + } + // CHAIN ID + if (auth_chain_id_) { // unit test different IDs + chain_name = columns[indices_[AUTH_ASYM_ID]].str(); + } else { + chain_name = columns[indices_[LABEL_ASYM_ID]].str(); + if (restrict_chains_.size() > 0 && // unit test + restrict_chains_.find(chain_name) == String::npos) { // unit test + return false; + } + } + + std::pair<bool, int> a_num = this->TryGetInt(columns[indices_[ID]], + "_atom_site.id", + profile_.fault_tolerant); // unit test + + alt_loc = columns[indices_[LABEL_ALT_ID]][0]; + res_name = columns[indices_[LABEL_COMP_ID]]; + std::pair<bool, int> res_num =this->TryGetInt(columns[indices_[LABEL_SEQ_ID]], + "_atom_site.label_seq_id", + profile_.fault_tolerant); // unit test + if (!res_num.first) { // unit test + if (profile_.fault_tolerant) { + return false; + } + } + + if (indices_[PDBX_PDB_INS_CODE] != -1) { // unit test + resnum=to_res_num(res_num.second, columns[indices_[PDBX_PDB_INS_CODE]][0]); + } + else { + resnum=to_res_num(res_num.second, ' '); + } + + return true; +} + +void MMCifParser::ParseAndAddAtom(const std::vector<StringRef>& columns) +{ + if (!this->EnsureEnoughColumns(columns, 12)) { + return; + } + mol::XCSEditor editor=ent_handle_.EditXCS(mol::BUFFERED_EDIT); // potbl + char alt_loc=0; + String chain_name; + StringRef res_name, atom_name; + mol::ResNum res_num(0); + if (!this->ParseAtomIdent(columns, + chain_name, + res_name, + res_num, + atom_name, + alt_loc)) { + return; + } + Real occ, temp; + geom::Vec3 apos; + + for (int i = CARTN_X; i <= CARTN_Z; ++i) { + std::pair<bool, float> result = this->TryGetFloat(columns[indices_[i]], + "atom_site.cartn_[xyz]", + profile_.fault_tolerant); + if (!result.first) { // unit test + if (profile_.fault_tolerant) { // unit test + return; + } + } + apos[i - CARTN_X] = result.second; + } + + if (indices_[OCCUPANCY] != -1) { // unit test + occ = this->TryGetReal(columns[indices_[OCCUPANCY]], "atom_site.occupancy"); + } else { + occ = 1.00f; + } + if (indices_[B_ISO_OR_EQUIV] != -1) { // unit test + temp = this->TryGetReal(columns[indices_[B_ISO_OR_EQUIV]], + "atom_site.B_iso_or_equiv"); + } else { + temp = 0; + } + + // determine element + String s_ele(columns[indices_[TYPE_SYMBOL]].str()); + + String aname(atom_name.str()); + // some postprocessing + LOG_TRACE( "s_chain: [" << chain_name << "]" ); + + // determine chain and residue update + bool update_chain=false; + bool update_residue=false; + if(!curr_chain_) { // unit test + update_chain=true; + update_residue=true; + } else if(curr_chain_.GetName() != chain_name) { // unit test + update_chain=true; + update_residue=true; + } + + if(!curr_residue_) { // unit test + update_residue=true; + } else if(curr_residue_.GetNumber() != res_num) { // unit test + update_residue=true; + } + + if(update_chain) { // unit test + curr_chain_ = ent_handle_.FindChain(chain_name); + if(!curr_chain_.IsValid()) { // unit test + LOG_DEBUG("new chain " << chain_name); + curr_chain_=editor.InsertChain(chain_name); + ++chain_count_; + } + assert(curr_chain_.IsValid()); + } + + if(update_residue) { // unit test + curr_residue_=mol::ResidueHandle(); + if (profile_.join_spread_atom_records) { // unit test + curr_residue_=curr_chain_.FindResidue(res_num); + } + if (!curr_residue_.IsValid()) { // unit test + LOG_DEBUG("new residue " << res_name << " " << res_num); + curr_residue_ =editor.AppendResidue(curr_chain_, res_name.str(), res_num); + warned_name_mismatch_=false; + ++residue_count_; + } + assert(curr_residue_.IsValid()); + } + + // finally add atom + LOG_DEBUG("adding atom " << aname << " (" << s_ele << ") @" << apos); + mol::AtomHandle ah; + if (curr_residue_.GetName()!=res_name.str()) { // unit test + if (!profile_.fault_tolerant && alt_loc=='.') { // unit test + std::stringstream ss; + ss << "Residue with number " << res_num << " has more than one name."; + throw IOException(this->FormatDiagnostic(STAR_DIAG_ERROR, + ss.str(), + this->GetCurrentLinenum())); + } + if (!warned_name_mismatch_) { // unit test + if (alt_loc=='.') { // unit test + LOG_WARNING("Residue with number " << res_num << " has more than one " + "name. Ignoring atoms for everything but the first"); + } else { + LOG_WARNING("Residue with number " << res_num + << " contains a microheterogeneity. Everything but atoms " + "for the residue '" << curr_residue_.GetName() + << "' will be ignored"); + } + } + warned_name_mismatch_=true; + return; + } + if (alt_loc!='.') { // unit test + // Check if there is already a atom with the same name. + mol::AtomHandle me=curr_residue_.FindAtom(aname); + if (me.IsValid()) { // unit test + try { + editor.AddAltAtomPos(String(1, alt_loc), me, apos); + } catch (Error) { + LOG_INFO("Ignoring atom alt location since there is already an atom " + "with name " << aname << ", but without an alt loc"); + return; + } + return; + } else { + ah = editor.InsertAltAtom(curr_residue_, aname, + String(1, alt_loc), apos, s_ele); + ++atom_count_; + } + } else { + mol::AtomHandle atom=curr_residue_.FindAtom(aname); + if (atom.IsValid() && !profile_.quack_mode) { // unit test + if (profile_.fault_tolerant) { // unit test + LOG_WARNING("duplicate atom '" << aname << "' in residue " + << curr_residue_); + return; + } + throw IOException(this->FormatDiagnostic(STAR_DIAG_ERROR, + "Duplicate atom '"+aname+ + "' in residue "+ + curr_residue_.GetQualifiedName(), + this->GetCurrentLinenum())); + } + ah = editor.InsertAtom(curr_residue_, aname, apos, s_ele); + ++atom_count_; + } + if (indices_[B_ISO_OR_EQUIV] != -1) {// unit test + ah.SetBFactor(temp); + } + if (indices_[OCCUPANCY] != -1) {// unit test + ah.SetOccupancy(occ); + } + + // record type + if (indices_[GROUP_PDB] != -1) { + record_type_ = columns[indices_[GROUP_PDB]].str(); + } else { + record_type_ = "ATOM"; + } + ah.SetHetAtom(record_type_[0]=='H'); +} + +void MMCifParser::OnDataRow(const StarLoopDesc& header, + const std::vector<StringRef>& columns) +{ + switch(category_) { + case ATOM_SITE: + LOG_TRACE("processing atom_site entry"); + this->ParseAndAddAtom(columns); + break; + default: + return; + } +} + + /* +void PDBReader::Import(mol::EntityHandle& ent, + const String& restrict_chains) +{ + do { + switch(curr_line[0]) { + case 'A': + case 'a': + if (IEquals(curr_line.substr(0, 6), StringRef("ANISOU", 6))) { + if (!charmm_style_) { + LOG_TRACE("processing ANISOU entry"); + this->ParseAnisou(curr_line, line_num_, ent); + } + } + break; + case 'C': + case 'c': + if (curr_line.size()<20) { + LOG_TRACE("skipping entry"); + continue; + } + if (IEquals(curr_line.substr(0, 6), StringRef("COMPND", 6))) { + LOG_TRACE("processing COMPND entry"); + this->ParseCompndEntry(curr_line, line_num_); + } + break; + case 'E': + case 'e': + if (curr_line.size()<3) { + continue; + } + if (IEquals(curr_line.rtrim(), StringRef("END", 3))) { + hard_end_=true; + go_on=false; + break; + } + if (curr_line.size()<6) { + continue; + } + if (IEquals(curr_line.substr(0, 6), StringRef("ENDMDL", 6))) { + go_on=false; + num_model_records_=0; + break; + } + case 'H': + case 'h': + if (curr_line.size()<6) { + continue; + } + if (IEquals(curr_line.substr(0, 6), StringRef("HETATM", 6))) { + if (profile_.no_hetatms) + continue; + LOG_TRACE("processing HETATM entry"); + this->ParseAndAddAtom(curr_line, line_num_, ent, + StringRef("HETATM", 6)); + } else if (IEquals(curr_line.substr(0, 6), StringRef("HELIX ", 6))) { + if (!charmm_style_) { + this->ParseHelixEntry(curr_line); + } + } else if (IEquals(curr_line.substr(0, 6), StringRef("HET ", 6))) { + // remember het entry to mark the residues as ligand during ATOM import + char chain=curr_line[12]; + std::pair<bool, int> num=curr_line.substr(13, 4).ltrim().to_int(); + if (!num.first) { + if (profile_.fault_tolerant) { + LOG_WARNING("Invalid HET entry on line " << line_num_); + continue; + } else { + String msg=str(format("Invalid HET entry on line %d")%line_num_); + throw IOException(msg); + } + } + hets_.push_back(HetEntry(chain, to_res_num(num.second, + curr_line[17]))); + } + break; + case 'M': + case 'm': + if (curr_line.size()<6) { + continue; + } + if (IEquals(curr_line.substr(0, 6), StringRef("MODEL ", 6))) { + ++num_model_records_; + if (num_model_records_<2) { + continue; + } + if (profile_.fault_tolerant) { + go_on=false; + num_model_records_=1; + break; + } + String msg=str(format("MODEL record without matching ENDMDL on line %d")%line_num_); + throw IOException(msg); + } + break; + case 'S': + case 's': + if (curr_line.size()<6) { + continue; + } + if (IEquals(curr_line.substr(0, 6), StringRef("SHEET ", 6))) { + if (!charmm_style_) { + this->ParseStrandEntry(curr_line); + } + } + if (IEquals(curr_line.substr(0, 6), StringRef("SEQRES", 6))) { + if (read_seqres_) { + this->ParseSeqRes(curr_line, line_num_); + } + } + break; + default: + break; + } + } while (std::getline(in_, curr_line_) && ++line_num_ && go_on); + LOG_INFO("imported " + << chain_count_ << " chains, " + << residue_count_ << " residues, " + << atom_count_ << " atoms; with " + << helix_list_.size() << " helices and " + << strand_list_.size() << " strands"); + this->AssignSecStructure(ent); + this->AssignMolIds(ent); + for (HetList::const_iterator i=hets_.begin(), e=hets_.end(); i!=e; ++i) { + mol::ResidueHandle res=ent.FindResidue(String(1, i->chain), i->num); + if (res.IsValid()) { + res.SetIsLigand(true); + } + } +} + */ + + + /* + virtual void OnEndLoop() { } + + virtual void OnDataItem(const StarDataItem& item) { } + + };*/ + +void MMCifParser::OnEndData() +{ + LOG_INFO("imported " + << chain_count_ << " chains, " + << residue_count_ << " residues, " + << atom_count_ << " atoms;"); +} + +}} diff --git a/modules/io/src/mol/mmcif_reader.hh b/modules/io/src/mol/mmcif_reader.hh new file mode 100644 index 000000000..20f7bacd9 --- /dev/null +++ b/modules/io/src/mol/mmcif_reader.hh @@ -0,0 +1,208 @@ +//------------------------------------------------------------------------------ +// This file is part of the OpenStructure project <www.openstructure.org> +// +// Copyright (C) 2008-2011 by the OpenStructure authors +// +// This library is free software; you can redistribute it and/or modify it under +// the terms of the GNU Lesser General Public License as published by the Free +// Software Foundation; either version 3.0 of the License, or (at your option) +// any later version. +// This library is distributed in the hope that it will be useful, but WITHOUT +// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +// FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more +// details. +// +// You should have received a copy of the GNU Lesser General Public License +// along with this library; if not, write to the Free Software Foundation, Inc., +// 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA +//------------------------------------------------------------------------------ +#ifndef OST_MMCIF_PARSER_HH +#define OST_MMCIF_PARSER_HH + +//#include <boost/iostreams/filtering_stream.hpp> +//#include <boost/filesystem/fstream.hpp> + +#include <ost/mol/residue_handle.hh> +#include <ost/io/mol/io_profile.hh> +#include <ost/io/io_exception.hh> +#include <ost/io/mol/star_parser.hh> + +namespace ost { namespace io { + +/// \enum categories of the mmcif format +typedef enum { + ATOM_SITE, + DONT_KNOW +} MMCifCategory; + + +/// \brief reader for the mmcif file format +/// +/// \section mmcif_format mmcif format description/ coverage +/// +/// mmcif is an instance of the \link StarParser STAR format\endlink to store +/// entries of the PDB. The following data categories should be covered by this +/// parser: +/// +/// \li atom_site +class DLLEXPORT_OST_IO MMCifParser : public StarParser { +public: + /// \brief create a MMCifParser + /// + /// \param stream input stream + MMCifParser(std::istream& stream, mol::EntityHandle& ent_handle, + const IOProfile& profile); + + /// \brief create a MMCifParser + /// + /// \param filename input file + MMCifParser(const String& filename, mol::EntityHandle& ent_handle, + const IOProfile& profile); + + /// \brief Initialise the parser. + /// + /// \param loc Location of the file + void Init(); + + /// \brief Set up a fresh instance + void ClearState(); + + /// \brief Set names of restricted chains for the parser. + /// + /// \param restrict_chains chain name + void SetRestrictChains(const String& restrict_chains); + + const String& GetRestrictChains() const + { + return restrict_chains_; + } + + /// \brief check mmcif input to be read. Substitutional function for + /// \link StarParser StarParser\endlink. + /// + /// \param data_name value of the data_ tag + /// + /// \return true, if the blockcode (PDB id) is valid, false otherwise + virtual bool OnBeginData(const StringRef& data_name); + + /// \brief check if a current loop is to be parsed + /// + /// \param header categories of the upcoming loop block + /// + /// \return bool + virtual bool OnBeginLoop(const StarLoopDesc& header); + + /// \brief read a row of data + /// + /// \param header categories and items + /// \param columns data + virtual void OnDataRow(const StarLoopDesc& header, + const std::vector<StringRef>& columns); + + /// \brief Finalise parsing. + virtual void OnEndData(); + + protected: + /// \brief Store an item index from loop header in preparation for reading a + /// row. Throws an exception if the item does not exist. + /// + /// \param mapping position the item index is stored at + /// \param item exact item name to fetch + /// \param header loop header to pull index from + void TryStoreIdx(const int mapping, + const String& item, + const StarLoopDesc& header) + { + indices_[mapping] = header.GetIndex(item); + + if (indices_[mapping] == -1) { + throw IOException(this->FormatDiagnostic(STAR_DIAG_ERROR, + "No item '" + item + + "' found in '" + + header.GetCategory()+ + "' header", + this->GetCurrentLinenum())); + } + } + + /// \brief Check a PDB id to be of length 4 and start with a digit + /// + /// \param pdbid putative PDB id + /// + /// \return true for a valid id, false otherwise + bool IsValidPDBIdent(const StringRef& pdbid); + + /// \brief Check no. of columns available. + /// + /// \param columns fields + /// \param size required no. of columns + /// + /// \return true if there are enough columns, false otherwise + bool EnsureEnoughColumns(const std::vector<StringRef>& columns, size_t size); + + /// \brief ... + /// + /// \param columns data row + /// \param atom_name corresponds to label_atom_id + bool ParseAtomIdent(const std::vector<StringRef>& columns, + String& chain_name, + StringRef& res_name, + mol::ResNum& resnum, + StringRef& atom_name, + char& alt_loc); + + /// \brief Fetch atom information and store it. + /// + /// \param columns data row + void ParseAndAddAtom(const std::vector<StringRef>& columns); + +private: + /// \enum magic numbers of this class + typedef enum { + PDBID_LEN=4, ///< length of a PDB id + MAX_ITEMS_IN_ROW=16 ///< count for possible items in a loop row + } MMCifMagicNos; + + /// \enum items of the atom_site category + typedef enum { + AUTH_ASYM_ID, ///< chain name by author as in PDB + ID, ///< atom serial id + LABEL_ALT_ID, ///< AltLoc + LABEL_ASYM_ID, ///< chain name by PDB + LABEL_ATOM_ID, + LABEL_COMP_ID, + LABEL_ENTITY_ID, + LABEL_SEQ_ID, ///< residue no. + TYPE_SYMBOL, ///< chemical element + CARTN_X, ///< Coordinates ||IMPORTANT: This 3 entries have to stay + CARTN_Y, ///< Coordinates ||together for the parser to work! + CARTN_Z, ///< Coordinates || + PDBX_PDB_INS_CODE, + OCCUPANCY, + B_ISO_OR_EQUIV, + GROUP_PDB ///< record name + } AtomSiteItems; + + // members + MMCifCategory category_; + int indices_[MAX_ITEMS_IN_ROW]; ///< map items to values in loops + const IOProfile& profile_; + mol::EntityHandle& ent_handle_; + String restrict_chains_; + bool auth_chain_id_; ///< use chain IDs given by authors rather than pdb + mol::ChainHandle curr_chain_; + mol::ResidueHandle curr_residue_; + int chain_count_; + int residue_count_; + int atom_count_; + bool warned_name_mismatch_; + String record_type_; + bool go_on_; ///< flow control within the parser hooks + //from pdbdreader + //entity als member, fill in ondatarow + //import function +}; + +}} + +#endif diff --git a/modules/io/src/mol/star_parser.cc b/modules/io/src/mol/star_parser.cc index 33e235d09..d0440904e 100644 --- a/modules/io/src/mol/star_parser.cc +++ b/modules/io/src/mol/star_parser.cc @@ -20,28 +20,40 @@ /* Author: Marco Biasini */ +#include <boost/iostreams/filter/gzip.hpp> + #include <cassert> #include <sstream> +#include <ost/log.hh> #include <ost/io/io_exception.hh> #include <ost/io/mol/star_parser.hh> namespace ost { namespace io { StarParser::StarParser(std::istream& stream, bool items_as_row): - stream_(stream), filename_("<stream>"), line_num_(0), + filename_("<stream>"), line_num_(0), has_current_line_(false), current_line_(), items_row_header_(), items_row_columns_(), items_row_values_() { items_as_row_ = items_as_row; + + stream_.push(stream); } StarParser::StarParser(const String& filename, bool items_as_row): - fstream_(filename.c_str()), stream_(fstream_), filename_(filename), + fstream_(filename.c_str()), filename_(filename), line_num_(0), has_current_line_(false), current_line_(), items_row_header_(), items_row_columns_(), items_row_values_() { items_as_row_ = items_as_row; + + if (filename.length() >= 3 && + filename.substr(filename.length() - 3) == ".gz") { + stream_.push(boost::iostreams::gzip_decompressor()); + } + + stream_.push(fstream_); } String StarParser::FormatDiagnostic(StarDiagType type, const String& message, @@ -66,6 +78,18 @@ String StarParser::FormatDiagnostic(StarDiagType type, const String& message, return ss.str(); } +Real StarParser::TryGetReal(const StringRef& data, const String& name) const +{ + std::pair<bool, Real> value = data.to_float(); + if (!value.first) { + throw IOException(this->FormatDiagnostic(STAR_DIAG_ERROR, + "Expecting real number for " + + name + ", found '" + data.str() + + "' instead.", line_num_)); + } + return value.second; +} + float StarParser::TryGetFloat(const StringRef& data, const String& name) const { std::pair<bool, float> value = data.to_float(); @@ -78,6 +102,28 @@ float StarParser::TryGetFloat(const StringRef& data, const String& name) const return value.second; } +std::pair<bool, float> StarParser::TryGetFloat(const StringRef& data, + const String& name, + bool may_fail) const +{ + std::pair<bool, float> value = data.to_float(); + if (!value.first) { + if (!may_fail) { + throw IOException(this->FormatDiagnostic(STAR_DIAG_ERROR, + "Expecting floating point value for " + + name + ", found '" + data.str() + + "' instead.", line_num_)); + } + else { + LOG_WARNING(this->FormatDiagnostic(STAR_DIAG_WARNING, + "Expecting floating point value for " + + name + ", found '" + data.str() + + "' instead.", line_num_)); + } + } + return value; +} + int StarParser::TryGetInt(const StringRef& data, const String& name) const { std::pair<bool, int> value = data.to_int(); @@ -90,6 +136,27 @@ int StarParser::TryGetInt(const StringRef& data, const String& name) const return value.second; } +std::pair<bool, int> StarParser::TryGetInt(const StringRef& data, + const String& name, + bool may_fail) const +{ + std::pair<bool, int> value = data.to_int(); + if (!value.first) { + if (!may_fail) { + throw IOException(this->FormatDiagnostic(STAR_DIAG_ERROR, + "Expecting integer value for " + + name + ", found '" + data.str() + + "' instead.", line_num_)); + } else { + LOG_WARNING(this->FormatDiagnostic(STAR_DIAG_WARNING, + "Expecting integer value for " + + name + ", found '" + data.str() + + "' instead.", line_num_)); + } + } + return value; +} + bool StarParser::TryGetBool(const StringRef& data, const String& name) const { if (data.length() == 1) { diff --git a/modules/io/src/mol/star_parser.hh b/modules/io/src/mol/star_parser.hh index 96da40e73..75feb7adf 100644 --- a/modules/io/src/mol/star_parser.hh +++ b/modules/io/src/mol/star_parser.hh @@ -23,6 +23,8 @@ /* Author: Marco Biasini */ +#include <boost/iostreams/filtering_stream.hpp> + #include <iostream> #include <fstream> #include <vector> @@ -148,6 +150,14 @@ public: /// OnBeginData() returned true. virtual void OnEndData() { } + /// \brief try to convert a value to Real, on failure raise an exception. + /// + /// \param data value to be converted + /// \param name to be included in the message + /// + /// \return converted value + Real TryGetReal(const StringRef& data, const String& name) const; + /// \brief try to convert a value to float, on failure raise an exception. /// /// \param data value to be converted @@ -156,6 +166,17 @@ public: /// \return converted value float TryGetFloat(const StringRef& data, const String& name) const; + /// \brief try to convert a value to float, on failure raise an exception. + /// + /// \param data value to be converted + /// \param name to be included in the message + /// \param may_fail decides if an exception is raised (false) or not (true) + /// + /// \return converted value + std::pair<bool, float> TryGetFloat(const StringRef& data, + const String& name, + bool may_fail) const; + /// \brief try to convert a value to integer, on failure raise an exception. /// /// \param data value to be converted @@ -164,6 +185,17 @@ public: /// \return converted value int TryGetInt(const StringRef& data, const String& name) const; + /// \brief try to convert a value to integer, exception can be turned off. + /// + /// \param data value to be converted + /// \param name to be included in the message + /// \param may_fail decides if an exception is raised (false) or not (true) + /// + /// \return pair with value and indicator if conversion worked + std::pair<bool, int> TryGetInt(const StringRef& data, + const String& name, + bool may_fail) const; + /// \brief try to convert a value to bool, on failure raise an exception. /// /// \param data value to be converted @@ -238,7 +270,7 @@ private: void DiagnoseUnknown(); bool ParseMultilineValue(String& value, bool skip=false); std::ifstream fstream_; - std::istream& stream_; + boost::iostreams::filtering_stream<boost::iostreams::input> stream_; String filename_; int line_num_; bool has_current_line_; diff --git a/modules/io/tests/CMakeLists.txt b/modules/io/tests/CMakeLists.txt index f10be7bc4..bcad4b24e 100644 --- a/modules/io/tests/CMakeLists.txt +++ b/modules/io/tests/CMakeLists.txt @@ -8,6 +8,7 @@ set(OST_IO_UNIT_TESTS test_iomanager.cc tests.cc test_star_parser.cc + test_mmcif_reader.cc ) if (ENABLE_IMG) list(APPEND OST_IO_UNIT_TESTS test_io_img.cc) diff --git a/modules/io/tests/test_mmcif_reader.cc b/modules/io/tests/test_mmcif_reader.cc new file mode 100644 index 000000000..09541ca29 --- /dev/null +++ b/modules/io/tests/test_mmcif_reader.cc @@ -0,0 +1,233 @@ +//------------------------------------------------------------------------------ +// This file is part of the OpenStructure project <www.openstructure.org> +// +// Copyright (C) 2008-2011 by the OpenStructure authors +// +// This library is free software; you can redistribute it and/or modify it under +// the terms of the GNU Lesser General Public License as published by the Free +// Software Foundation; either version 3.0 of the License, or (at your option) +// any later version. +// This library is distributed in the hope that it will be useful, but WITHOUT +// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +// FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more +// details. +// +// You should have received a copy of the GNU Lesser General Public License +// along with this library; if not, write to the Free Software Foundation, Inc., +// 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA +//------------------------------------------------------------------------------ + +#include <fstream> +#include <ost/io/io_exception.hh> +#include <ost/io/mol/mmcif_reader.hh> + +#define BOOST_AUTO_TEST_DYN_LINK +#include <boost/test/unit_test.hpp> + + +using namespace ost; +using namespace ost::io; + +class TestMMCifParserProtected : MMCifParser { +public: + TestMMCifParserProtected(std::istream& stream, mol::EntityHandle& ent_handle): + MMCifParser(stream, ent_handle, IOProfile()) + { } + + TestMMCifParserProtected(std::istream& stream, mol::EntityHandle& ent_handle, + const IOProfile& profile): + MMCifParser(stream, ent_handle, profile) + { } + + using MMCifParser::IsValidPDBIdent; + using MMCifParser::ParseAtomIdent; + using MMCifParser::ParseAndAddAtom; + using MMCifParser::EnsureEnoughColumns; + using MMCifParser::TryStoreIdx; +}; + +BOOST_AUTO_TEST_SUITE( io ); + +BOOST_AUTO_TEST_CASE(mmcif_isvalidpdbid) +{ + mol::EntityHandle eh=mol::CreateEntity(); + + // on changing the tests for a PDB id in mmcif files, extend this unit test + BOOST_MESSAGE(" Running mmcif_isvalidpdbid tests..."); + std::ifstream s("testfiles/mmcif/atom_site.mmcif"); + TestMMCifParserProtected tmmcif_p(s, eh); + StringRef id = StringRef("1FOO", 4); + BOOST_MESSAGE(" Testing valid id ('"+ id.str() +"')..."); + BOOST_CHECK(tmmcif_p.IsValidPDBIdent(id)); + BOOST_MESSAGE(" done."); + id = StringRef("this is to long for a PDB id", 28); + BOOST_MESSAGE(" Testing oversized PDB id ('"+ id.str() +"')..."); + BOOST_CHECK(!tmmcif_p.IsValidPDBIdent(id)); + BOOST_MESSAGE(" done."); + id = StringRef("nFOO", 4); + BOOST_MESSAGE(" Testing PDB id with missing number ('"+ id.str() +"')..."); + BOOST_CHECK(!tmmcif_p.IsValidPDBIdent(id)); + BOOST_MESSAGE(" done."); +} + +BOOST_AUTO_TEST_CASE(mmcif_trystoreidx) +{ + mol::EntityHandle eh = mol::CreateEntity(); + + BOOST_MESSAGE(" Running mmcif_trystoreidx tests..."); + std::ifstream s("testfiles/mmcif/atom_site.mmcif"); + TestMMCifParserProtected tmmcif_p(s, eh, IOProfile()); + StarLoopDesc mmcif_h; + mmcif_h.SetCategory(StringRef("Foo", 3)); + // negative + BOOST_CHECK_THROW(tmmcif_p.TryStoreIdx(0, "bar", mmcif_h), IOException); + // positive + mmcif_h.Add(StringRef("bar", 3)); + BOOST_CHECK_NO_THROW(tmmcif_p.TryStoreIdx(0, "bar", mmcif_h)); +} + +BOOST_AUTO_TEST_CASE(mmcif_atom_site_header) +{ + mol::EntityHandle eh=mol::CreateEntity(); + + BOOST_MESSAGE(" Running mmcif_atom_site_header tests..."); + std::ifstream s("testfiles/mmcif/atom_site.mmcif"); + MMCifParser mmcif_p(s, eh, IOProfile()); + StarLoopDesc mmcif_h; + mmcif_h.SetCategory(StringRef("atom_site", 9)); + BOOST_MESSAGE(" auth_asym_id"); + BOOST_CHECK_THROW(mmcif_p.OnBeginLoop(mmcif_h), IOException); + mmcif_h.Add(StringRef("auth_asym_id", 12)); + BOOST_MESSAGE(" id"); + BOOST_CHECK_THROW(mmcif_p.OnBeginLoop(mmcif_h), IOException); + mmcif_h.Add(StringRef("id", 2)); + BOOST_MESSAGE(" label_alt_id"); + BOOST_CHECK_THROW(mmcif_p.OnBeginLoop(mmcif_h), IOException); + mmcif_h.Add(StringRef("label_alt_id", 12)); + BOOST_MESSAGE(" label_asym_id"); + BOOST_CHECK_THROW(mmcif_p.OnBeginLoop(mmcif_h), IOException); + mmcif_h.Add(StringRef("label_asym_id", 13)); + BOOST_MESSAGE(" label_atom_id"); + BOOST_CHECK_THROW(mmcif_p.OnBeginLoop(mmcif_h), IOException); + mmcif_h.Add(StringRef("label_atom_id", 13)); + BOOST_MESSAGE(" label_comp_id"); + BOOST_CHECK_THROW(mmcif_p.OnBeginLoop(mmcif_h), IOException); + mmcif_h.Add(StringRef("label_comp_id", 13)); + BOOST_MESSAGE(" label_entity_id"); + BOOST_CHECK_THROW(mmcif_p.OnBeginLoop(mmcif_h), IOException); + mmcif_h.Add(StringRef("label_entity_id", 15)); + BOOST_MESSAGE(" label_seq_id"); + BOOST_CHECK_THROW(mmcif_p.OnBeginLoop(mmcif_h), IOException); + mmcif_h.Add(StringRef("label_seq_id", 12)); + BOOST_MESSAGE(" type_symbol"); + BOOST_CHECK_THROW(mmcif_p.OnBeginLoop(mmcif_h), IOException); + mmcif_h.Add(StringRef("type_symbol", 11)); + BOOST_MESSAGE(" Cartn_x"); + BOOST_CHECK_THROW(mmcif_p.OnBeginLoop(mmcif_h), IOException); + mmcif_h.Add(StringRef("Cartn_x", 7)); + BOOST_MESSAGE(" Cartn_y"); + BOOST_CHECK_THROW(mmcif_p.OnBeginLoop(mmcif_h), IOException); + mmcif_h.Add(StringRef("Cartn_y", 7)); + BOOST_MESSAGE(" Cartn_z"); + BOOST_CHECK_THROW(mmcif_p.OnBeginLoop(mmcif_h), IOException); + mmcif_h.Add(StringRef("Cartn_z", 7)); + BOOST_MESSAGE(" done."); +} + +BOOST_AUTO_TEST_CASE(mmcif_ensureenoughcolumns) +{ + mol::EntityHandle eh=mol::CreateEntity(); + + BOOST_MESSAGE(" Running mmcif_ensureenoughcolumns tests..."); + std::ifstream s("testfiles/mmcif/atom_site.mmcif"); + IOProfile profile; + TestMMCifParserProtected tmmcif_p(s, eh, profile); + std::vector<StringRef> cols; + BOOST_MESSAGE(" testing short atom_site entry"); + cols.push_back(StringRef("ATOM", 4)); + BOOST_CHECK_THROW(tmmcif_p.EnsureEnoughColumns(cols, 2), IOException); + BOOST_MESSAGE(" testing correct number"); + BOOST_CHECK(tmmcif_p.EnsureEnoughColumns(cols, 1)); + BOOST_MESSAGE(" testing fault tolerant profile"); + profile.fault_tolerant = true; + BOOST_CHECK(!tmmcif_p.EnsureEnoughColumns(cols, 2)); + BOOST_MESSAGE(" done."); +} + +BOOST_AUTO_TEST_CASE(mmcif_parseatomident) +{ + mol::EntityHandle eh = mol::CreateEntity(); + + BOOST_MESSAGE(" Running mmcif_parseatomident tests..."); + std::ifstream s("testfiles/mmcif/atom_site.mmcif"); + IOProfile profile; + TestMMCifParserProtected tmmcif_p(s, eh, profile); + std::vector<StringRef> cols; + String chain_name; + StringRef res_name; + mol::ResNum resnum(0); + StringRef atom_name; + char alt_loc; + + BOOST_MESSAGE(" testing short atom_site entry"); + // negative + cols.push_back(StringRef("ATOM", 4)); + BOOST_CHECK_THROW(tmmcif_p.ParseAtomIdent(cols, + chain_name, + res_name, + resnum, + atom_name, + alt_loc), IOException); + // positive + StarLoopDesc tmmcif_h; + tmmcif_h.SetCategory(StringRef("atom_site", 9)); + // build header + //mmcif_h.Add(StringRef("AUTH_ASYM_ID", 12)); + /* + this->TryStoreIdx(AUTH_ASYM_ID, "auth_asym_id", header); + this->TryStoreIdx(ID, "id", header); + this->TryStoreIdx(LABEL_ALT_ID, "label_alt_id", header); + this->TryStoreIdx(LABEL_ASYM_ID, "label_asym_id", header); + this->TryStoreIdx(LABEL_ATOM_ID, "label_atom_id", header); + this->TryStoreIdx(LABEL_COMP_ID, "label_comp_id", header); + this->TryStoreIdx(LABEL_ENTITY_ID, "label_entity_id", header); + this->TryStoreIdx(LABEL_SEQ_ID, "label_seq_id", header); + this->TryStoreIdx(TYPE_SYMBOL, "type_symbol", header); + this->TryStoreIdx(CARTN_X, "Cartn_x", header); + this->TryStoreIdx(CARTN_Y, "Cartn_y", header); + this->TryStoreIdx(CARTN_Z, "Cartn_z", header); +*/ + + BOOST_MESSAGE(" done."); +} + +BOOST_AUTO_TEST_CASE(mmcif_parseandaddatom) +{ + mol::EntityHandle eh = mol::CreateEntity(); + + BOOST_MESSAGE(" Running mmcif_parseandaddatom tests..."); + std::ifstream s("testfiles/mmcif/atom_site.mmcif"); + TestMMCifParserProtected tmmcif_p(s, eh, IOProfile()); + std::vector<StringRef> cols; + + BOOST_MESSAGE(" testing short atom_site entry"); + cols.push_back(StringRef("ATOM", 4)); + BOOST_CHECK_THROW(tmmcif_p.ParseAndAddAtom(cols), IOException); + BOOST_MESSAGE(" done."); +} + +BOOST_AUTO_TEST_CASE(mmcif_testreader) +{ + mol::EntityHandle eh = mol::CreateEntity(); + std::ifstream s("testfiles/mmcif/atom_site.mmcif"); + IOProfile profile; + MMCifParser mmcif_p(s, eh, profile); + + mmcif_p.Parse(); + + BOOST_REQUIRE_EQUAL(eh.GetChainCount(), 2); + BOOST_REQUIRE_EQUAL(eh.GetResidueCount(), 4); + BOOST_REQUIRE_EQUAL(eh.GetAtomCount(), 25); +} + +BOOST_AUTO_TEST_SUITE_END(); diff --git a/modules/io/tests/testfiles/mmcif/atom_site.mmcif b/modules/io/tests/testfiles/mmcif/atom_site.mmcif new file mode 100644 index 000000000..6c9a29637 --- /dev/null +++ b/modules/io/tests/testfiles/mmcif/atom_site.mmcif @@ -0,0 +1,49 @@ +data_1BAR + +loop_ +_atom_site.group_PDB +_atom_site.type_symbol +_atom_site.label_atom_id +_atom_site.label_comp_id +_atom_site.label_asym_id +_atom_site.label_seq_id +_atom_site.label_alt_id +_atom_site.label_entity_id +_atom_site.Cartn_x +_atom_site.Cartn_y +_atom_site.Cartn_z +_atom_site.occupancy +_atom_site.B_iso_or_equiv +_atom_site.footnote_id +_atom_site.auth_seq_id +_atom_site.id +_atom_site.auth_asym_id +ATOM N N VAL A 11 . 1 25.369 30.691 11.795 1.00 17.93 . 11 1 A +ATOM C CA VAL A 11 . 1 25.970 31.965 12.332 1.00 17.75 . 11 2 A +ATOM C C VAL A 11 . 1 25.569 32.010 13.808 1.00 17.83 . 11 3 A +ATOM O O VAL A 11 . 1 24.735 31.190 14.167 1.00 17.53 . 11 4 A +ATOM C CB VAL A 11 . 1 25.379 33.146 11.540 1.00 17.66 . 11 5 A +ATOM C CG1 VAL A 11 . 1 25.584 33.034 10.030 1.00 18.86 . 11 6 A +ATOM C CG2 VAL A 11 . 1 23.933 33.309 11.872 1.00 17.12 . 11 7 A +ATOM N N THR A 12 . 1 26.095 32.930 14.590 1.00 18.97 4 12 8 A +ATOM C CA THR A 12 . 1 25.734 32.995 16.032 1.00 19.80 4 12 9 A +ATOM C C THR A 12 . 1 24.695 34.106 16.113 1.00 20.92 4 12 10 A +ATOM O O THR A 12 . 1 24.869 35.118 15.421 1.00 21.84 4 12 11 A +ATOM C CB THR A 12 . 1 26.911 33.346 17.018 1.00 20.51 4 12 12 A +ATOM O OG1 THR A 12 3 1 27.946 33.921 16.183 0.50 20.29 4 12 13 A +ATOM O OG1 THR A 12 4 1 27.769 32.142 17.103 0.50 20.59 4 12 14 A +ATOM C CG2 THR A 12 3 1 27.418 32.181 17.878 0.50 20.47 4 12 15 A +ATOM C CG2 THR A 12 4 1 26.489 33.778 18.426 0.50 20.00 4 12 16 A +ATOM N N ILE A 13 . 1 23.664 33.855 16.884 1.00 22.08 . 13 17 A +ATOM C CA ILE A 13 . 1 22.623 34.850 17.093 1.00 23.44 . 13 18 A +ATOM C C ILE A 13 . 1 22.657 35.113 18.610 1.00 25.77 . 13 19 A +ATOM O O ILE A 13 . 1 23.123 34.250 19.406 1.00 26.28 . 13 20 A +ATOM C CB ILE A 13 . 1 21.236 34.463 16.492 1.00 22.67 . 13 21 A +ATOM C CG1 ILE A 13 . 1 20.478 33.469 17.371 1.00 22.14 . 13 22 A +ATOM C CG2 ILE A 13 . 1 21.357 33.986 15.016 1.00 21.75 . 13 23 A +# - - - - data truncated for brevity - - - - +HETATM C C1 APS C 14 1 1 4.171 29.012 7.116 0.58 17.27 1 300 101 A +HETATM C C2 APS C 14 1 1 4.949 27.758 6.793 0.58 16.95 1 300 102 A +HETATM O O3 APS C 14 1 1 4.800 26.678 7.393 0.58 16.85 1 300 103 A +HETATM N N4 APS C 14 1 1 5.930 27.841 5.869 0.58 16.43 1 300 104 A +# - - - - data truncated for brevity - - - - -- GitLab