diff --git a/modules/io/doc/mmcif.rst b/modules/io/doc/mmcif.rst index 240c7a6ab52714c8f3f2b5beb1210a88c648e498..78e126def7cf2b67171435e5c2f9efe49ee43de2 100644 --- a/modules/io/doc/mmcif.rst +++ b/modules/io/doc/mmcif.rst @@ -40,7 +40,10 @@ The following categories of a mmCIF file are considered by the reader: * ``struct_ref`` stored in :class:`MMCifInfoStructRef` * ``struct_ref_seq`` stored in :class:`MMCifInfoStructRefSeq` * ``struct_ref_seq_dif`` stored in :class:`MMCifInfoStructRefDif` -* ``database_pdb_rev`` stored in :class:`MMCifInfoRevisions` +* ``database_pdb_rev`` (mmCIF dictionary version < 5) stored in + :class:`MMCifInfoRevisions` +* ``pdbx_audit_revision_history`` and ``pdbx_audit_revision_details`` + (mmCIF dictionary version >= 5) used to fill :class:`MMCifInfoRevisions` Notes: @@ -257,8 +260,8 @@ of the annotation available. .. method:: SetRevisionsDateOriginal(date) - Set the date, when this entry first entered the PDB. - See :meth:`MMCifInfoRevisions.SetDateOriginal`. + Set the date, when this entry first entered the PDB. Ignored if it was set + in the past. See :meth:`MMCifInfoRevisions.SetDateOriginal`. .. class:: MMCifInfoCitation @@ -965,7 +968,8 @@ of the annotation available. .. attribute:: first_release Index + 1 of the revision releasing this entry. If the value is 0, was not - set yet. + set yet. Set first time we encounter a :meth:`GetStatus` value of + "full release" (mmCIF versions < 5) or "Initial release" (current mmCIF). :type: :class:`int` diff --git a/modules/io/src/mol/mmcif_info.hh b/modules/io/src/mol/mmcif_info.hh index 5fc9d576bb27ab05c8a59b8ec72cc3e1043fdc3f..49cd8e16c9901a6d2132ca91c2b7d66f7e11f19d 100644 --- a/modules/io/src/mol/mmcif_info.hh +++ b/modules/io/src/mol/mmcif_info.hh @@ -675,7 +675,7 @@ private: String replaced_pdb_id_; ///< replaced entry }; -/// \brief container class for information on file revisions (database_pdb_rev) +/// \brief Container class for information on file revisions /// class DLLEXPORT_OST_IO MMCifInfoRevisions { public: @@ -701,19 +701,16 @@ public: { if (num_.size() && (num_.back() >= num)) { std::stringstream ss; - ss << "'num' component of 'database_pdb_rev' category has to increase "; - ss << "with every revision, last was "; - ss << num_.back(); - ss << ", trying to add "; - ss << num; + ss << "Unique ID of revision has to increase with every revision, " + << "last was " << num_.back() << ", trying to add " << num; throw IOException(ss.str()); - } + } num_.push_back(num); date_.push_back(date); status_.push_back(status); - // set first release date if not already occuoied + // set first release date if not already occupied if (first_release_ == 0) { - if (status == "full release") { + if (status == "full release" || status == "Initial release") { first_release_ = status_.size(); } } @@ -1023,7 +1020,10 @@ public: /// \param date void SetRevisionsDateOriginal(String date) { - revisions_.SetDateOriginal(date); + // only set once + if (revisions_.GetDateOriginal() == "?") { + revisions_.SetDateOriginal(date); + } } /// \brief Add a revision to history @@ -1051,7 +1051,7 @@ private: Real resolution_; MMCifInfoStructDetails struct_details_; ///< mmCIF struct category MMCifInfoObsolete obsolete_; ///< obsolete/ superseded entry - MMCifInfoRevisions revisions_; ///< database_pdb_rev category + MMCifInfoRevisions revisions_; ///< list of revisions std::vector<MMCifInfoCitation> citations_; ///< list of citations std::vector<MMCifInfoBioUnit> biounits_; ///< list of biounits std::vector<MMCifInfoTransOpPtr> transops_; diff --git a/modules/io/src/mol/mmcif_reader.cc b/modules/io/src/mol/mmcif_reader.cc index bbee2014c766b2c8b6589be70a035855c959e7ae..abd02fe6e153746cc2df08c56cfdb60c698e8880 100644 --- a/modules/io/src/mol/mmcif_reader.cc +++ b/modules/io/src/mol/mmcif_reader.cc @@ -87,6 +87,11 @@ void MMCifReader::ClearState() bu_assemblies_.clear(); helix_list_.clear(); strand_list_.clear(); + his_revision_ordinal_avail_ = false; + det_revision_ordinal_avail_ = false; + revision_dates_.clear(); + revision_types_.clear(); + database_PDB_rev_added_ = false; } void MMCifReader::SetRestrictChains(const String& restrict_chains) @@ -331,6 +336,7 @@ bool MMCifReader::OnBeginLoop(const StarLoopDesc& header) indices_[SRSD_DETAILS]=header.GetIndex("details"); cat_available = true; } else if (header.GetCategory()=="database_PDB_rev") { + // THIS IS FOR mmCIF versions < 5 category_ = DATABASE_PDB_REV; // mandatory items this->TryStoreIdx(DPI_NUM, "num", header); @@ -339,6 +345,33 @@ bool MMCifReader::OnBeginLoop(const StarLoopDesc& header) indices_[DPI_DATE_ORIGINAL] = header.GetIndex("date_original"); indices_[DPI_STATUS] = header.GetIndex("status"); cat_available = true; + } else if (header.GetCategory()=="pdbx_audit_revision_history") { + // THIS IS FOR mmCIF versions >= 5 + category_ = PDBX_AUDIT_REVISION_HISTORY; + // mandatory items + this->TryStoreIdx(PARH_REVISION_DATE, "revision_date", header); + // optional items + indices_[PARH_ORDINAL] = header.GetIndex("ordinal"); + // TOCHECK: shouldn't ordinal be mandatory?? + his_revision_ordinal_avail_ = (indices_[PARH_ORDINAL] != -1); + cat_available = true; + } else if (header.GetCategory()=="pdbx_audit_revision_details") { + // THIS IS FOR mmCIF versions >= 5 + category_ = PDBX_AUDIT_REVISION_DETAILS; + // mandatory items + this->TryStoreIdx(PARD_TYPE, "type", header); + // optional items + indices_[PARD_REVISION_ORDINAL] = header.GetIndex("revision_ordinal"); + // TOCHECK: shouldn't ordinal be mandatory?? + det_revision_ordinal_avail_ = (indices_[PARD_REVISION_ORDINAL] != -1); + cat_available = true; + } else if (header.GetCategory()=="pdbx_database_status") { + // THIS IS FOR mmCIF versions >= 5 + category_ = PDBX_DATABASE_STATUS; + // optional items + indices_[PDS_RECVD_INITIAL_DEPOSITION_DATE] + = header.GetIndex("recvd_initial_deposition_date"); + cat_available = true; } category_counts_[category_]++; return cat_available; @@ -1352,6 +1385,53 @@ void MMCifReader::ParseDatabasePDBRev(const std::vector<StringRef>& columns) status = StringRef("", 0); } info_.AddRevision(num, date.str(), status.str()); + database_PDB_rev_added_ = true; +} + +void MMCifReader::ParsePdbxAuditRevisionHistory( + const std::vector<StringRef>& columns) { + int num; + StringRef date; + // get ordinal (or count) + if (his_revision_ordinal_avail_) { + num = this->TryGetInt(columns[indices_[PARH_ORDINAL]], + "pdbx_audit_revision_history.ordinal"); + } else if (revision_dates_.empty()) { + num = 0; + } else { + num = revision_dates_.begin()->first + 1; + } + // get date + date = columns[indices_[PARH_REVISION_DATE]]; + // add to map + revision_dates_[num] = date.str(); +} + +void MMCifReader::ParsePdbxAuditRevisionDetails( + const std::vector<StringRef>& columns) { + int num; + StringRef type; + // get ordinal (or count) + if (det_revision_ordinal_avail_) { + num = this->TryGetInt(columns[indices_[PARD_REVISION_ORDINAL]], + "pdbx_audit_revision_details.revision_ordinal"); + } else if (revision_types_.empty()) { + num = 0; + } else { + num = revision_types_.begin()->first + 1; + } + // get type + type = columns[indices_[PARD_TYPE]]; + // add to map + revision_types_[num] = type.str(); +} + +void MMCifReader::ParsePdbxDatabaseStatus( + const std::vector<StringRef>& columns) { + const int idx = indices_[PDS_RECVD_INITIAL_DEPOSITION_DATE]; + if (idx != -1) { + info_.SetRevisionsDateOriginal(columns[idx].str()); + } } void MMCifReader::OnDataRow(const StarLoopDesc& header, @@ -1430,6 +1510,18 @@ void MMCifReader::OnDataRow(const StarLoopDesc& header, LOG_TRACE("processing database_PDB_rev entry"); this->ParseDatabasePDBRev(columns); break; + case PDBX_AUDIT_REVISION_HISTORY: + LOG_TRACE("processing pdbx_audit_revision_history entry"); + this->ParsePdbxAuditRevisionHistory(columns); + break; + case PDBX_AUDIT_REVISION_DETAILS: + LOG_TRACE("processing pdbx_audit_revision_details entry"); + this->ParsePdbxAuditRevisionDetails(columns); + break; + case PDBX_DATABASE_STATUS: + LOG_TRACE("processing pdbx_database_status entry"); + this->ParsePdbxDatabaseStatus(columns); + break; default: throw IOException(this->FormatDiagnostic(STAR_DIAG_ERROR, "Uncatched category '"+ header.GetCategory() +"' found.", @@ -1689,6 +1781,23 @@ void MMCifReader::OnEndData() // create secondary structure from struct_conf info this->AssignSecStructure(ent_handle_); + // add revision history for new style mmCIFs (only if no old data there) + if (!database_PDB_rev_added_) { + std::map<int, String>::const_iterator rd_it, rt_it; + for (rd_it = revision_dates_.begin(); rd_it != revision_dates_.end(); + ++rd_it) { + // look for status + const int num = rd_it->first; + const String& date = rd_it->second; + if ( his_revision_ordinal_avail_ && det_revision_ordinal_avail_ + && revision_types_.find(num) != revision_types_.end()) { + info_.AddRevision(num, date, revision_types_[num]); + } else { + info_.AddRevision(num, date, "?"); + } + } + } + LOG_INFO("imported " << chain_count_ << " chains, " << residue_count_ << " residues, " diff --git a/modules/io/src/mol/mmcif_reader.hh b/modules/io/src/mol/mmcif_reader.hh index a8e35f52cf543ca6f744224d88723caef4d16b42..55a195652092481182c644dd3f936e7b3593b2bc 100644 --- a/modules/io/src/mol/mmcif_reader.hh +++ b/modules/io/src/mol/mmcif_reader.hh @@ -298,6 +298,21 @@ protected: /// \param columns data row void ParseDatabasePDBRev(const std::vector<StringRef>& columns); + /// \brief Fetch mmCIF pdbx_audit_revision_history information + /// + /// \param columns data row + void ParsePdbxAuditRevisionHistory(const std::vector<StringRef>& columns); + + /// \brief Fetch mmCIF pdbx_audit_revision_details information + /// + /// \param columns data row + void ParsePdbxAuditRevisionDetails(const std::vector<StringRef>& columns); + + /// \brief Fetch mmCIF pdbx_database_status information + /// + /// \param columns data row + void ParsePdbxDatabaseStatus(const std::vector<StringRef>& columns); + /// \brief Fetch mmCIF struct information /// /// \param columns data row @@ -531,6 +546,23 @@ private: DPI_STATUS, ///< status of a revision } DatabasePDBRevItems; + /// \enum categories of the pdbx_audit_revision_history category + typedef enum { + PARH_ORDINAL, ///< unique identifier + PARH_REVISION_DATE, ///< revision date + } PdbxAuditRevisionHistoryItems; + + /// \enum categories of the pdbx_audit_revision_details category + typedef enum { + PARD_REVISION_ORDINAL, ///< link to pdbx_audit_revision_history.ordinal + PARD_TYPE, ///< type classification of the revision + } PdbxAuditRevisionDetailsItems; + + /// \enum categories of the pdbx_database_status category + typedef enum { + PDS_RECVD_INITIAL_DEPOSITION_DATE, ///< date of initial deposition + } PdbxDatabaseStatusItems; + /// \enum categories of the mmcif format typedef enum { ATOM_SITE, @@ -551,6 +583,9 @@ private: STRUCT_REF_SEQ, STRUCT_REF_SEQ_DIF, DATABASE_PDB_REV, + PDBX_AUDIT_REVISION_HISTORY, + PDBX_AUDIT_REVISION_DETAILS, + PDBX_DATABASE_STATUS, DONT_KNOW } MMCifCategory; @@ -621,6 +656,12 @@ private: MMCifHSVector helix_list_; ///< for storing struct_conf sec.struct. data MMCifHSVector strand_list_; ///< for storing struct_conf sec.struct. data MMCifInfoStructRefs struct_refs_; + // for storing revisions + bool his_revision_ordinal_avail_; + bool det_revision_ordinal_avail_; + std::map<int, String> revision_dates_; + std::map<int, String> revision_types_; + bool database_PDB_rev_added_; }; }} diff --git a/modules/io/tests/test_mmcif_reader.cc b/modules/io/tests/test_mmcif_reader.cc index c5788c9238b030aa538e2629ce10504da94bb06a..83cba518f76fd002405e1a0f789fb43afc4b5f22 100644 --- a/modules/io/tests/test_mmcif_reader.cc +++ b/modules/io/tests/test_mmcif_reader.cc @@ -1352,4 +1352,68 @@ BOOST_AUTO_TEST_CASE(mmcif_test_chain_mappings) BOOST_TEST_MESSAGE(" done."); } +BOOST_AUTO_TEST_CASE(mmcif_test_revisions_old) +{ + BOOST_TEST_MESSAGE(" Running mmcif_test_revisions_old tests..."); + + // load data + mol::EntityHandle eh = mol::CreateEntity(); + std::ifstream s("testfiles/mmcif/3IMJ_rev_old.cif"); + IOProfile profile; + MMCifReader mmcif_p(s, eh, profile); + BOOST_REQUIRE_NO_THROW(mmcif_p.Parse()); + const MMCifInfo& info = mmcif_p.GetInfo(); + const MMCifInfoRevisions& revs = info.GetRevisions(); + + // check items + BOOST_CHECK_EQUAL(revs.GetSize(), size_t(3)); + BOOST_CHECK_EQUAL(revs.GetNum(0), 1); + BOOST_CHECK_EQUAL(revs.GetDate(0), String("2009-11-17")); + BOOST_CHECK_EQUAL(revs.GetStatus(0), String("full release")); + BOOST_CHECK_EQUAL(revs.GetNum(1), 2); + BOOST_CHECK_EQUAL(revs.GetDate(1), String("2011-07-13")); + BOOST_CHECK_EQUAL(revs.GetStatus(1), String("?")); + BOOST_CHECK_EQUAL(revs.GetNum(2), 3); + BOOST_CHECK_EQUAL(revs.GetDate(2), String("2012-12-12")); + BOOST_CHECK_EQUAL(revs.GetStatus(2), String("?")); + // check rest + BOOST_CHECK_EQUAL(revs.GetDateOriginal(), String("2009-08-10")); + BOOST_CHECK_EQUAL(revs.GetLastDate(), String("2012-12-12")); + BOOST_CHECK_EQUAL(revs.GetFirstRelease(), size_t(1)); + + BOOST_TEST_MESSAGE(" done."); +} + +BOOST_AUTO_TEST_CASE(mmcif_test_revisions_new) +{ + BOOST_TEST_MESSAGE(" Running mmcif_test_revisions_new tests..."); + + // load data + mol::EntityHandle eh = mol::CreateEntity(); + std::ifstream s("testfiles/mmcif/3IMJ_rev_new.cif"); + IOProfile profile; + MMCifReader mmcif_p(s, eh, profile); + BOOST_REQUIRE_NO_THROW(mmcif_p.Parse()); + const MMCifInfo& info = mmcif_p.GetInfo(); + const MMCifInfoRevisions& revs = info.GetRevisions(); + + // check items + BOOST_CHECK_EQUAL(revs.GetSize(), size_t(3)); + BOOST_CHECK_EQUAL(revs.GetNum(0), 1); + BOOST_CHECK_EQUAL(revs.GetDate(0), String("2009-11-17")); + BOOST_CHECK_EQUAL(revs.GetStatus(0), String("Initial release")); + BOOST_CHECK_EQUAL(revs.GetNum(1), 2); + BOOST_CHECK_EQUAL(revs.GetDate(1), String("2011-07-13")); + BOOST_CHECK_EQUAL(revs.GetStatus(1), String("?")); + BOOST_CHECK_EQUAL(revs.GetNum(2), 3); + BOOST_CHECK_EQUAL(revs.GetDate(2), String("2012-12-12")); + BOOST_CHECK_EQUAL(revs.GetStatus(2), String("?")); + // check rest + BOOST_CHECK_EQUAL(revs.GetDateOriginal(), String("2009-08-10")); + BOOST_CHECK_EQUAL(revs.GetLastDate(), String("2012-12-12")); + BOOST_CHECK_EQUAL(revs.GetFirstRelease(), size_t(1)); + + BOOST_TEST_MESSAGE(" done."); +} + BOOST_AUTO_TEST_SUITE_END(); diff --git a/modules/io/tests/testfiles/mmcif/3IMJ_rev_new.cif b/modules/io/tests/testfiles/mmcif/3IMJ_rev_new.cif new file mode 100644 index 0000000000000000000000000000000000000000..bbc66a30194ce5ff62fd84dd7a758728bc35ff29 --- /dev/null +++ b/modules/io/tests/testfiles/mmcif/3IMJ_rev_new.cif @@ -0,0 +1,35 @@ +data_3IMJ +# taken from 3IMJ.cif (new style) +_entry.id 3IMJ +# +_audit_conform.dict_name mmcif_pdbx.dic +_audit_conform.dict_version 5.281 +_audit_conform.dict_location http://mmcif.pdb.org/dictionaries/ascii/mmcif_pdbx.dic +# +_pdbx_database_status.entry_id 3IMJ +_pdbx_database_status.status_code REL +_pdbx_database_status.deposit_site RCSB +_pdbx_database_status.process_site RCSB +_pdbx_database_status.recvd_initial_deposition_date 2009-08-10 +_pdbx_database_status.status_code_sf REL +_pdbx_database_status.status_code_mr ? +_pdbx_database_status.SG_entry ? +_pdbx_database_status.status_code_cs ? +_pdbx_database_status.pdb_format_compatible Y +# +loop_ +_pdbx_audit_revision_history.ordinal +_pdbx_audit_revision_history.data_content_type +_pdbx_audit_revision_history.major_revision +_pdbx_audit_revision_history.minor_revision +_pdbx_audit_revision_history.revision_date +1 'Structure model' 1 0 2009-11-17 +2 'Structure model' 1 1 2011-07-13 +3 'Structure model' 1 2 2012-12-12 +# +_pdbx_audit_revision_details.ordinal 1 +_pdbx_audit_revision_details.revision_ordinal 1 +_pdbx_audit_revision_details.data_content_type 'Structure model' +_pdbx_audit_revision_details.provider repository +_pdbx_audit_revision_details.type 'Initial release' +_pdbx_audit_revision_details.description ? diff --git a/modules/io/tests/testfiles/mmcif/3IMJ_rev_old.cif b/modules/io/tests/testfiles/mmcif/3IMJ_rev_old.cif new file mode 100644 index 0000000000000000000000000000000000000000..a85aef8364f1d443c74acbc586f877b7abe56c4f --- /dev/null +++ b/modules/io/tests/testfiles/mmcif/3IMJ_rev_old.cif @@ -0,0 +1,32 @@ +data_3IMJ +# taken from 3IMJ.cif (old style) +_entry.id 3IMJ +# +_audit_conform.dict_name mmcif_pdbx.dic +_audit_conform.dict_version 4.026 +_audit_conform.dict_location http://mmcif.pdb.org/dictionaries/ascii/mmcif_pdbx.dic +# +loop_ +_database_PDB_rev.num +_database_PDB_rev.date +_database_PDB_rev.date_original +_database_PDB_rev.mod_type +_database_PDB_rev.replaces +_database_PDB_rev.status +1 2009-11-17 2009-08-10 0 3IMJ 'full release' +2 2011-07-13 ? 1 3IMJ ? +3 2012-12-12 ? 1 3IMJ ? +# +_pdbx_database_status.entry_id 3IMJ +_pdbx_database_status.status_code REL +_pdbx_database_status.deposit_site RCSB +_pdbx_database_status.process_site RCSB +_pdbx_database_status.status_code_sf REL +_pdbx_database_status.status_code_mr ? +_pdbx_database_status.SG_entry ? +_pdbx_database_status.status_code_cs ? +_pdbx_database_status.dep_release_code_coordinates 'HOLD FOR PUBLICATION' +_pdbx_database_status.dep_release_code_struct_fact 'HOLD FOR PUBLICATION' +_pdbx_database_status.dep_release_code_sequence 'RELEASE NOW' + +# NOTE: values modified to match new style numbers