diff --git a/modules/io/doc/mmcif.rst b/modules/io/doc/mmcif.rst index 38934cc7943df2c6d1cec5f40dd43c34485368e6..ccde8f725d0a72e1151d674f5adc16ad1f792068 100644 --- a/modules/io/doc/mmcif.rst +++ b/modules/io/doc/mmcif.rst @@ -41,6 +41,7 @@ The following categories of a mmCIF file are considered by the reader: * ``struct_ref`` stored in :class:`MMCifInfoStructRef` * ``struct_ref_seq`` stored in :class:`MMCifInfoStructRefSeq` * ``struct_ref_seq_dif`` stored in :class:`MMCifInfoStructRefDif` +* ``database_pdb_rev`` stored in :class:`MMCifInfoRevisions` Info Classes ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ @@ -99,6 +100,14 @@ of the annotation available. .. attribute:: struct_refs Lists all links to external databases in the mmCIF file. + + .. attribute:: revisions + + Stores a simple history of a PDB entry. + + Also available as :meth:`GetRevisions`. May be extended by + :meth:`AddRevision`. + .. method:: AddCitation(citation) Add a citation to the citation list of an info object. @@ -196,6 +205,28 @@ of the annotation available. :type pdb_chain_id: :class:`str` :returns: atom_site.label_asym_id as :class:`str` + .. method:: AddRevision(num, date, status) + + Add a new iteration to the history. + + :param num: database_pdb_rev.num + :type num: :class:`int` + :param date: database_pdb_rev.date + :type date: :class:`str` + :param status: database_pdb_rev.status + :type status: :class:`str` + + .. method:: GetRevisions() + + See :attr:`revisions` + + .. method:: SetRevisionsDateOriginal(date) + + Set the date, when this entry first entered the PDB. + + :param date: database_pdb_rev.date_original + :type date: :class:`str` + .. class:: MMCifInfoCitation This stores citation information from an input file. @@ -828,7 +859,86 @@ of the annotation available. :type: :class:`str` +.. class:: MMCifInfoRevisions + + Revision history of a PDB entry. If you find a '?' somewhere, this means + 'not set'. + + .. attribute:: date_original + + The date when this entry was seen in PDB for the very first time. This is + not necessarily the release date. + + :type: :class:`str` + + .. attribute:: first_release + + Index of the revision releasing this entry. + + :type: :class:`int` + + .. method:: SetDateOriginal(date) + + Set the date, when this entry first entered the PDB. + + :param date: database_pdb_rev.date_original + :type date: :class:`str` + + .. method:: GetDateOriginal() + + Retrieve database_pdb_rev.date_original. + + :returns: database_pdb_rev.date_original as :class:`str` in format + 'yyyy-mm-dd' + + .. method:: AddRevision(int num, String date, String status) + + Add a new iteration to the history. + + :param num: database_pdb_rev.num + :type num: :class:`int` + :param date: database_pdb_rev.date + :type date: :class:`str` + :param status: database_pdb_rev.status + :type status: :class:`str` + + .. method:: GetSize() + + :returns: Number of revisions as :class:`int` + + .. method:: GetDate(i) + + :param i: Index of revision + :type i: :class:`int` + :returns: database_pdb_rev.date as :class:`str` + + .. method:: GetNum(i) + + :param i: Index of revision + :type i: :class:`int` + :returns: database_pdb_rev.num as :class:`int` + + .. method:: GetStatus(i) + + :param i: Index of revision + :type i: :class:`int` + :returns: database_pdb_rev.status as :class:`str` + + .. method:: GetLastDate() + + The date of the latest revision. + + :returns: date as :class:`str` + + .. method:: GetFirstRelease() + + Points to the revision releasing the entry. + + :returns: Index as :class:`int` + .. LocalWords: cas isbn pubmed asu seqres conop ConnectAll casp COMPND OBSLTE .. LocalWords: SPRSDE pdb func autofunction exptl attr pdbx oper conf spr dif .. LocalWords: biounits biounit uniprot UNP seqs AddMMCifPDBChainTr cif asym .. LocalWords: auth GetMMCifPDBChainTr AddPDBCMMCifhainTr GetPDBMMCifChainTr +.. LocalWords: GetRevisions AddRevision SetRevisionsDateOriginal GetSize +.. LocalWords: GetNum num GetStatus GetLastDate GetFirstRelease diff --git a/modules/io/pymod/export_mmcif_io.cc b/modules/io/pymod/export_mmcif_io.cc index f2527066afca1d2d715c97dcc1413276eafa7a6b..78cd7e124f08460009e4f5b98432080e8422ae95 100644 --- a/modules/io/pymod/export_mmcif_io.cc +++ b/modules/io/pymod/export_mmcif_io.cc @@ -261,6 +261,21 @@ void export_mmcif_io() &MMCifInfoObsolete::SetReplacedPDBID) ; + class_<MMCifInfoRevisions>("MMCifInfoRevisions", init<>()) + .def("SetDateOriginal", &MMCifInfoRevisions::SetDateOriginal) + .def("GetDateOriginal", &MMCifInfoRevisions::GetDateOriginal) + .def("AddRevision", &MMCifInfoRevisions::AddRevision) + .def("GetSize", &MMCifInfoRevisions::GetSize) + .def("GetDate", &MMCifInfoRevisions::GetDate) + .def("GetNum", &MMCifInfoRevisions::GetNum) + .def("GetStatus", &MMCifInfoRevisions::GetStatus) + .def("GetLastDate", &MMCifInfoRevisions::GetLastDate) + .def("GetFirstRelease", &MMCifInfoRevisions::GetFirstRelease) + .add_property("date_original", &MMCifInfoRevisions::GetDateOriginal, + &MMCifInfoRevisions::SetDateOriginal) + .add_property("first_release", &MMCifInfoRevisions::GetFirstRelease) + ; + class_<MMCifInfo>("MMCifInfo", init<>()) .def("AddCitation", &MMCifInfo::AddCitation) .def("GetCitations", make_function(&MMCifInfo::GetCitations, @@ -284,6 +299,9 @@ void export_mmcif_io() .def("GetMMCifPDBChainTr", &MMCifInfo::GetMMCifPDBChainTr) .def("AddPDBMMCifChainTr", &MMCifInfo::AddPDBMMCifChainTr) .def("GetPDBMMCifChainTr", &MMCifInfo::GetPDBMMCifChainTr) + .def("SetRevisionsDateOriginal", &MMCifInfo::SetRevisionsDateOriginal) + .def("AddRevision", &MMCifInfo::AddRevision) + .def("GetRevisions", &MMCifInfo::GetRevisions) .add_property("citations", make_function(&MMCifInfo::GetCitations, return_value_policy<copy_const_reference>())) .add_property("biounits", make_function(&MMCifInfo::GetBioUnits, @@ -299,5 +317,6 @@ void export_mmcif_io() return_value_policy<copy_const_reference>())) .add_property("obsolete", &MMCifInfo::GetObsoleteInfo, &MMCifInfo::SetObsoleteInfo) + .add_property("revisions", &MMCifInfo::GetRevisions) ; } diff --git a/modules/io/src/mol/mmcif_info.hh b/modules/io/src/mol/mmcif_info.hh index 3eebf54b2369947c9d007e616c4d33c2f968750c..f567af0520381f6b726be63046235e2dd0b9a87a 100644 --- a/modules/io/src/mol/mmcif_info.hh +++ b/modules/io/src/mol/mmcif_info.hh @@ -545,7 +545,7 @@ private: /// class DLLEXPORT_OST_IO MMCifInfoObsolete { public: - /// \brief Create an object of information baout an obsolete entry. + /// \brief Create an object of information about an obsolete entry. MMCifInfoObsolete(): date_(""), id_(UNKNOWN), pdb_id_(""), replaced_pdb_id_("") {}; @@ -620,6 +620,95 @@ private: String replaced_pdb_id_; ///< replaced entry }; +/// \brief container class for information on file revisions (database_pdb_rev) +/// +class DLLEXPORT_OST_IO MMCifInfoRevisions { +public: + /// \brief Start recording a revision process. + MMCifInfoRevisions(): date_original_("?"), first_release_(0) {}; + + /// \brief Set date the entry entered PDB. + /// + /// \param date + void SetDateOriginal(String date) { date_original_ = date; } + + /// \brief Get date the entry entered PDB. + /// + /// \return date + String GetDateOriginal() const { return date_original_; } + + /// \brief Add a revision to history + /// + /// \param num unique identifier + /// \param date date of revision + /// \param status status of the revision + void AddRevision(int num, String date, String status) + { + if (num_.size() && (num_.back() > num)) { + std::stringstream ss; + ss << "'num' component of 'database_pdb_rev' category has to increase "; + ss << "with every revision, last was "; + ss << num_.back(); + ss << ", trying to add "; + ss << num; + throw IOException(ss.str()); + } + num_.push_back(num); + date_.push_back(date); + status_.push_back(status); + // set first release date if not already occuoied + if (first_release_ == 0) { + if (status == "full release") { + first_release_ = status_.size(); + } + } + } + + /// \brief Get number of revisions stored. + /// + /// \return number + size_t GetSize() const { return num_.size(); } + + /// \brief Get revision date by index in list. + /// + /// \param i position in list + /// \return date + String GetDate(size_t i) const { return date_[i]; } + + /// \brief Get revision num by index in list. + /// + /// \param i position in list + /// \return num + int GetNum(size_t i) const { return num_[i]; } + + /// \brief Get revision status by index in list. + /// + /// \param i position in list + /// \return status + String GetStatus(size_t i) const { return status_[i]; } + + /// \brief Get date of last revision. + /// + /// \return date + String GetLastDate() const { return date_.back(); } + + /// \brief Get the index of the full release revision. + /// + /// \return index + size_t GetFirstRelease() const + { + return first_release_; + } + +private: + String date_original_; ///< first time seen in PDB + size_t first_release_; ///< index of full release revision + std::vector<int> num_; ///< sequential id of revision (gets larger) + std::vector<String> date_; ///< date of revision + std::vector<String> status_; ///< ststus phrase for this revision +}; + + class MMCifInfoStructRef; class MMCifInfoStructRefSeq; class MMCifInfoStructRefSeqDif; @@ -861,6 +950,32 @@ public: } const MMCifInfoStructRefs& GetStructRefs() const { return struct_refs_; } void SetStructRefs(const MMCifInfoStructRefs& sr) { struct_refs_=sr; } + + /// \brief Set date_original of revisions. + /// + /// \param date + void SetRevisionsDateOriginal(String date) + { + revisions_.SetDateOriginal(date); + } + + /// \brief Add a revision to history + /// + /// \param num unique identifier + /// \param date date of revision + /// \param status status of the revision + void AddRevision(int num, String date, String status) + { + revisions_.AddRevision(num, date, status); + } + + /// \brief Get history + /// + /// \return MMCifInfoRevisions + MMCifInfoRevisions GetRevisions() const + { + return revisions_; + } //protected: private: @@ -869,6 +984,7 @@ private: Real resolution_; MMCifInfoStructDetails struct_details_; ///< mmCIF struct category MMCifInfoObsolete obsolete_; ///< obsolete/ superseded entry + MMCifInfoRevisions revisions_; ///< database_pdb_rev category std::vector<MMCifInfoCitation> citations_; ///< list of citations std::vector<MMCifInfoBioUnit> biounits_; ///< list of biounits std::vector<MMCifInfoTransOpPtr> transops_; diff --git a/modules/io/src/mol/mmcif_reader.cc b/modules/io/src/mol/mmcif_reader.cc index 12494ce3dc715b315650c9c7ef4dd3eb0381c21d..82606170106dd75090cab69161ae1b53f14cf811 100644 --- a/modules/io/src/mol/mmcif_reader.cc +++ b/modules/io/src/mol/mmcif_reader.cc @@ -307,31 +307,40 @@ bool MMCifReader::OnBeginLoop(const StarLoopDesc& header) this->TryStoreIdx(REPLACE_PDB_ID, "replace_pdb_id", header); cat_available = true; } else if (header.GetCategory() == "struct_ref") { - category_ = STRUCT_REF; - this->TryStoreIdx(SR_ENTITY_ID, "entity_id", header); - this->TryStoreIdx(SR_ID, "id", header); - this->TryStoreIdx(SR_DB_NAME, "db_name", header); - this->TryStoreIdx(SR_DB_CODE, "db_code", header); - indices_[SR_DB_ACCESS]=header.GetIndex("pdbx_db_accession"); - cat_available = true; - } else if (header.GetCategory() == "struct_ref_seq") { - category_ = STRUCT_REF_SEQ; - this->TryStoreIdx(SRS_ALIGN_ID, "align_id", header); - this->TryStoreIdx(SRS_STRUCT_REF_ID, "ref_id", header); - this->TryStoreIdx(SRS_ENT_ALIGN_BEG, "seq_align_beg", header); - this->TryStoreIdx(SRS_ENT_ALIGN_END, "seq_align_end", header); - this->TryStoreIdx(SRS_DB_ALIGN_BEG, "db_align_beg", header); - this->TryStoreIdx(SRS_DB_ALIGN_END, "db_align_end", header); + category_ = STRUCT_REF; + this->TryStoreIdx(SR_ENTITY_ID, "entity_id", header); + this->TryStoreIdx(SR_ID, "id", header); + this->TryStoreIdx(SR_DB_NAME, "db_name", header); + this->TryStoreIdx(SR_DB_CODE, "db_code", header); + indices_[SR_DB_ACCESS]=header.GetIndex("pdbx_db_accession"); + cat_available = true; + } else if (header.GetCategory() == "struct_ref_seq") { + category_ = STRUCT_REF_SEQ; + this->TryStoreIdx(SRS_ALIGN_ID, "align_id", header); + this->TryStoreIdx(SRS_STRUCT_REF_ID, "ref_id", header); + this->TryStoreIdx(SRS_ENT_ALIGN_BEG, "seq_align_beg", header); + this->TryStoreIdx(SRS_ENT_ALIGN_END, "seq_align_end", header); + this->TryStoreIdx(SRS_DB_ALIGN_BEG, "db_align_beg", header); + this->TryStoreIdx(SRS_DB_ALIGN_END, "db_align_end", header); indices_[SRS_PDBX_STRAND_ID]=header.GetIndex("pdbx_strand_id"); - cat_available = true; - } else if (header.GetCategory()=="struct_ref_seq_dif") { - category_ = STRUCT_REF_SEQ_DIF; - this->TryStoreIdx(SRSD_ALIGN_ID, "align_id", header); - this->TryStoreIdx(SRSD_SEQ_RNUM, "seq_num", header); - this->TryStoreIdx(SRSD_DB_RNUM, "pdbx_seq_db_seq_num", header); - indices_[SRSD_DETAILS]=header.GetIndex("details"); - cat_available = true; - } + cat_available = true; + } else if (header.GetCategory()=="struct_ref_seq_dif") { + category_ = STRUCT_REF_SEQ_DIF; + this->TryStoreIdx(SRSD_ALIGN_ID, "align_id", header); + this->TryStoreIdx(SRSD_SEQ_RNUM, "seq_num", header); + this->TryStoreIdx(SRSD_DB_RNUM, "pdbx_seq_db_seq_num", header); + indices_[SRSD_DETAILS]=header.GetIndex("details"); + cat_available = true; + } else if (header.GetCategory()=="database_PDB_rev") { + category_ = DATABASE_PDB_REV; + // mandatory items + this->TryStoreIdx(DPI_NUM, "num", header); + // optional items + indices_[DPI_DATE] = header.GetIndex("date"); + indices_[DPI_DATE_ORIGINAL] = header.GetIndex("date_original"); + indices_[DPI_STATUS] = header.GetIndex("status"); + cat_available = true; + } category_counts_[category_]++; return cat_available; } @@ -598,7 +607,6 @@ void MMCifReader::ParseAndAddAtom(const std::vector<StringRef>& columns) // record type ah.SetHetAtom(indices_[GROUP_PDB] == -1 ? false : columns[indices_[GROUP_PDB]][0]=='H'); - } void MMCifReader::ParseEntity(const std::vector<StringRef>& columns) @@ -1301,6 +1309,30 @@ void MMCifReader::ParsePdbxDatabasePdbObsSpr(const std::vector<StringRef>& info_.SetObsoleteInfo(obs_data); } +void MMCifReader::ParseDatabasePDBRev(const std::vector<StringRef>& columns) +{ + int num; + StringRef date; + StringRef status; + + num = this->TryGetInt(columns[indices_[DPI_NUM]], "database_PDB_rev.num"); + std::cout<<num<<std::endl; + if (indices_[DPI_DATE] != -1) { + date = columns[indices_[DPI_DATE]]; + } else { + date = StringRef("", 0); + } + if (indices_[DPI_DATE_ORIGINAL] != -1) { + info_.SetRevisionsDateOriginal(columns[indices_[DPI_DATE_ORIGINAL]].str()); + } + if (indices_[DPI_STATUS] != -1) { + status = columns[indices_[DPI_STATUS]]; + } else { + status = StringRef("", 0); + } + info_.AddRevision(num, date.str(), status.str()); +} + void MMCifReader::OnDataRow(const StarLoopDesc& header, const std::vector<StringRef>& columns) { @@ -1362,17 +1394,21 @@ void MMCifReader::OnDataRow(const StarLoopDesc& header, this->ParsePdbxDatabasePdbObsSpr(columns); break; case STRUCT_REF: - LOG_TRACE("processing struct_ref entry"); - this->ParseStructRef(columns); - break; + LOG_TRACE("processing struct_ref entry"); + this->ParseStructRef(columns); + break; case STRUCT_REF_SEQ: - LOG_TRACE("processing struct_ref entry"); - this->ParseStructRefSeq(columns); - break; + LOG_TRACE("processing struct_ref entry"); + this->ParseStructRefSeq(columns); + break; case STRUCT_REF_SEQ_DIF: - LOG_TRACE("processing struct_ref entry"); - this->ParseStructRefSeqDif(columns); - break; + LOG_TRACE("processing struct_ref entry"); + this->ParseStructRefSeqDif(columns); + break; + case DATABASE_PDB_REV: + LOG_TRACE("processing database_PDB_rev entry"); + this->ParseDatabasePDBRev(columns); + break; default: throw IOException(this->FormatDiagnostic(STAR_DIAG_ERROR, "Uncatched category '"+ header.GetCategory() +"' found.", @@ -1424,7 +1460,6 @@ void MMCifReader::AssignSecStructure(mol::EntityHandle ent) } } - void MMCifReader::ParseStructRef(const std::vector<StringRef>& columns) { String ent_id=columns[indices_[SR_ENTITY_ID]].str(); diff --git a/modules/io/src/mol/mmcif_reader.hh b/modules/io/src/mol/mmcif_reader.hh index e9ff3b893f0e8ade4cce29838b79fb8ab89cf871..b8f6daed068fd5c96293d4d8e6442d24a74b3e0b 100644 --- a/modules/io/src/mol/mmcif_reader.hh +++ b/modules/io/src/mol/mmcif_reader.hh @@ -55,6 +55,7 @@ namespace ost { namespace io { /// \li struct_conf /// \li struct_sheet_range /// \li pdbx_database_PDB_obs_spr +/// \li database_PDB_rev class DLLEXPORT_OST_IO MMCifReader : public StarParser { public: /// \brief create a MMCifReader @@ -237,8 +238,8 @@ protected: /// proper handling of our sequence classes, these need to be converted to /// one-letter-codes. Ideally, we would use the canonical SEQRES. This is /// not possible, however, since the PDB assigns multiple one letter codes - /// to some of the residues. To be consistent, we have to do the conversion on - /// our own. + /// to some of the residues. To be consistent, we have to do the conversion + /// on our own. String ConvertSEQRES(const String& seqres, conop::CompoundLibPtr compound_lib); /// \brief Fetch mmCIF citation_author information /// @@ -287,6 +288,11 @@ protected: /// \param columns data row void ParsePdbxStructOperList(const std::vector<StringRef>& columns); + /// \brief Fetch mmCIF database_PDB_rev information + /// + /// \param columns data row + void ParseDatabasePDBRev(const std::vector<StringRef>& columns); + /// \brief Fetch mmCIF struct information /// /// \param columns data row @@ -420,24 +426,25 @@ private: SR_DB_ACCESS } StructRefItems; - /// \enum items of the struct_ref_seq category - typedef enum { - SRS_ALIGN_ID, - SRS_STRUCT_REF_ID, - SRS_PDBX_STRAND_ID, - SRS_DB_ALIGN_BEG, - SRS_DB_ALIGN_END, - SRS_ENT_ALIGN_BEG, - SRS_ENT_ALIGN_END - } StructRefSeqItems; - - /// \enum items of the struct_ref_seq_dif category - typedef enum { - SRSD_ALIGN_ID, - SRSD_SEQ_RNUM, - SRSD_DB_RNUM, + /// \enum items of the struct_ref_seq category + typedef enum { + SRS_ALIGN_ID, + SRS_STRUCT_REF_ID, + SRS_PDBX_STRAND_ID, + SRS_DB_ALIGN_BEG, + SRS_DB_ALIGN_END, + SRS_ENT_ALIGN_BEG, + SRS_ENT_ALIGN_END + } StructRefSeqItems; + + /// \enum items of the struct_ref_seq_dif category + typedef enum { + SRSD_ALIGN_ID, + SRSD_SEQ_RNUM, + SRSD_DB_RNUM, SRSD_DETAILS - } StructRefSeqDifItems; + } StructRefSeqDifItems; + /// \enum items of the pdbx_struct_assembly_gen category typedef enum { ASSEMBLY_ID, ///< link to pdbx_struct_assembly.id @@ -511,6 +518,14 @@ private: REPLACE_PDB_ID, ///< OLD PDB ID } PdbxDatabasePDBObsSpr; + /// \enum categories of the database_PDB_rev category + typedef enum { + DPI_NUM, ///< unique identifier + DPI_DATE, ///< revision date + DPI_DATE_ORIGINAL, ///< date of first sight + DPI_STATUS, ///< status of a revision + } DatabasePDBRevItems; + /// \enum categories of the mmcif format typedef enum { ATOM_SITE, @@ -530,6 +545,7 @@ private: STRUCT_REF, STRUCT_REF_SEQ, STRUCT_REF_SEQ_DIF, + DATABASE_PDB_REV, DONT_KNOW } MMCifCategory; diff --git a/modules/io/tests/test_mmcif_info.cc b/modules/io/tests/test_mmcif_info.cc index 65e4fdaecb42ec54fb50d3c579f24ba8b7ae8adb..3f3e21baf4b8a9e3f10b8f65380338668015bfd3 100644 --- a/modules/io/tests/test_mmcif_info.cc +++ b/modules/io/tests/test_mmcif_info.cc @@ -187,6 +187,31 @@ BOOST_AUTO_TEST_CASE(mmcif_info_structdetails) BOOST_MESSAGE(" done."); } +BOOST_AUTO_TEST_CASE(mmcif_info_revisions) +{ + BOOST_MESSAGE(" Running mmcif_info_revisions tests..."); + + MMCifInfoRevisions rev = MMCifInfoRevisions(); + + BOOST_CHECK(rev.GetDateOriginal() == "?"); + + rev.SetDateOriginal("2012-05-04"); + rev.AddRevision(1, "2012-05-04", "in preparation"); + rev.AddRevision(2, "2012-05-05", "full release"); + + BOOST_CHECK(rev.GetSize() == 2); + BOOST_CHECK(rev.GetDateOriginal() == "2012-05-04"); + BOOST_CHECK(rev.GetDate(0) == "2012-05-04"); + BOOST_CHECK(rev.GetNum(0) == 1); + BOOST_CHECK(rev.GetStatus(0) == "in preparation"); + BOOST_CHECK(rev.GetDate(1) == rev.GetLastDate()); + BOOST_CHECK(rev.GetFirstRelease() == 2); + BOOST_CHECK(rev.GetNum(1) == 2); + BOOST_CHECK(rev.GetStatus(1) == "full release"); + + BOOST_MESSAGE(" done."); +} + BOOST_AUTO_TEST_CASE(mmcif_info) { BOOST_MESSAGE(" Running mmcif_info tests..."); @@ -213,6 +238,8 @@ BOOST_AUTO_TEST_CASE(mmcif_info) BOOST_CHECK("B" == info.GetPDBMMCifChainTr("A")); BOOST_CHECK("" == info.GetPDBMMCifChainTr("C")); + BOOST_CHECK(info.GetRevisions().GetSize() == 0); + BOOST_MESSAGE(" done."); }