diff --git a/modules/io/doc/mmcif.rst b/modules/io/doc/mmcif.rst index 0d8f0e43e7bb450778e07bcee399a90e6a6aa0bd..22af5e694cf5224f48b539889d1379a0ccc3f280 100644 --- a/modules/io/doc/mmcif.rst +++ b/modules/io/doc/mmcif.rst @@ -36,7 +36,8 @@ The following categories of a mmCIF file are considered by the reader: in the :class:`entity <ost.mol.EntityHandle>` * ``struct_sheet_range``: Stores secondary structure information for sheets in the :class:`entity <ost.mol.EntityHandle>` - +* ``pdbx_database_PDB_obs_spr``: Verbose information on obsoleted/ superseded + entries, stored in :class:`MMCifInfoObsolete`. Info Classes ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ @@ -576,3 +577,68 @@ of the annotation available. .. method:: SetModelTypeDetails(details) See :attr:`model_type_details` + +.. class:: MMCifInfoObsolete + + Holds details on obsolete/ superseded structures. + + .. attribute:: date + + When was the entry replaced? + + Also available as :meth:`GetDate`. May also be modified by + :meth:`SetDate`. + + .. attribute:: id + + Type of change. Either *Obsolete* or *Supersede*. Returns a string starting + upper case. Has to be set via ``OBSLTE`` or ``SPRSDE``. + + Also available as :meth:`GetID`. May also be modified by + :meth:`SetID`. + + .. attribute:: pdb_id + + ID of the replacing entry. + + Also available as :meth:`GetPDBID`. May also be modified by + :meth:`SetPDBID`. + + .. attribute:: replace_pdb_id + + ID of the replaced entry. + + Also available as :meth:`GetReplacedPDBID`. May also be modified by + :meth:`SetReplacedPDBID`. + + .. method:: GetDate() + + See :attr:`date` + + .. method:: SetDate(date) + + See :attr:`date` + + .. method:: GetID() + + See :attr:`id` + + .. method:: SetID(id) + + See :attr:`id` + + .. method:: GetPDBID() + + See :attr:`pdb_id` + + .. method:: SetPDBID(flag) + + See :attr:`pdb_id` + + .. method:: GetReplacedPDBID() + + See :attr:`replace_pdb_id` + + .. method:: SetReplacedPDBID(descriptor) + + See :attr:`replace_pdb_id` diff --git a/modules/io/pymod/export_mmcif_io.cc b/modules/io/pymod/export_mmcif_io.cc index 1262328bff906df84c2c588f69b12b097e09bbc3..4479c232ac6c83409aff686a78cdf32971bad2d9 100644 --- a/modules/io/pymod/export_mmcif_io.cc +++ b/modules/io/pymod/export_mmcif_io.cc @@ -195,7 +195,26 @@ void export_mmcif_io() .add_property("model_type_details", &MMCifInfoStructDetails::GetModelTypeDetails, &MMCifInfoStructDetails::SetModelTypeDetails) -; + ; + + class_<MMCifInfoObsolete>("MMCifInfoObsolete", init<>()) + .def("SetDate", &MMCifInfoObsolete::SetDate) + .def("GetDate", &MMCifInfoObsolete::GetDate) + .def("SetID", &MMCifInfoObsolete::SetID) + .def("GetID", &MMCifInfoObsolete::GetID) + .def("SetPDBID", &MMCifInfoObsolete::SetPDBID) + .def("GetPDBID", &MMCifInfoObsolete::GetPDBID) + .def("SetReplacedPDBID", &MMCifInfoObsolete::SetReplacedPDBID) + .def("GetReplacedPDBID", &MMCifInfoObsolete::GetReplacedPDBID) + .add_property("date", &MMCifInfoObsolete::GetDate, + &MMCifInfoObsolete::SetDate) + .add_property("id", &MMCifInfoObsolete::GetID, + &MMCifInfoObsolete::SetID) + .add_property("pdb_id", &MMCifInfoObsolete::GetPDBID, + &MMCifInfoObsolete::SetPDBID) + .add_property("replace_pdb_id", &MMCifInfoObsolete::GetReplacedPDBID, + &MMCifInfoObsolete::SetReplacedPDBID) + ; class_<MMCifInfo>("MMCifInfo", init<>()) .def("AddCitation", &MMCifInfo::AddCitation) @@ -214,6 +233,8 @@ void export_mmcif_io() return_value_policy<copy_const_reference>())) .def("SetStructDetails", &MMCifInfo::SetStructDetails) .def("GetStructDetails", &MMCifInfo::GetStructDetails) + .def("SetObsoleteInfo", &MMCifInfo::SetObsoleteInfo) + .def("GetObsoleteInfo", &MMCifInfo::GetObsoleteInfo) .add_property("citations", make_function(&MMCifInfo::GetCitations, return_value_policy<copy_const_reference>())) .add_property("biounits", make_function(&MMCifInfo::GetBioUnits, @@ -225,5 +246,7 @@ void export_mmcif_io() return_value_policy<copy_const_reference>())) .add_property("struct_details", &MMCifInfo::GetStructDetails, &MMCifInfo::SetStructDetails) + .add_property("obsolete", &MMCifInfo::GetObsoleteInfo, + &MMCifInfo::SetObsoleteInfo) ; } diff --git a/modules/io/src/mol/mmcif_info.hh b/modules/io/src/mol/mmcif_info.hh index 3e9400194f8c65e735bf5207330a5807b1fd0b12..469e52188cb68f92a2e0403438979537f45616e5 100644 --- a/modules/io/src/mol/mmcif_info.hh +++ b/modules/io/src/mol/mmcif_info.hh @@ -526,6 +526,85 @@ private: std::vector<String> authors_; ///< author information }; +/// \brief container class for information on obsolete entries +/// +class DLLEXPORT_OST_IO MMCifInfoObsolete { +public: + /// \brief Create an object of information baout an obsolete entry. + MMCifInfoObsolete(): date_(""), id_(UNKNOWN), pdb_id_(""), + replaced_pdb_id_("") {}; + + /// \brief Set date of replacement. + /// + /// \param date + void SetDate(String date) { date_ = date; } + + /// \brief Get the date string. + /// + /// \return date as string. + String GetDate() { return date_; } + + /// \brief Set type of entry. + /// + /// \param type + void SetID(StringRef type) + { + if (type == StringRef("OBSLTE", 6)) { + id_ = OBSLTE; + } + else if (type == StringRef("SPRSDE", 6)) { + id_ = SPRSDE; + } + } + + /// \brief Get type of entry. + /// + /// \return type as string, starting with an upper case letter. + String GetID() + { + if (id_ == OBSLTE) { + return "Obsolete"; + } + if (id_ == SPRSDE) { + return "Supersede"; + } + return "Unknown"; + } + + /// \brief Set id of replacement. + /// + /// \param id + void SetPDBID(String id) { pdb_id_ = id; } + + /// \brief Get id of replacement. + /// + /// \return id + String GetPDBID() { return pdb_id_; } + + /// \brief Set id of replaced entry. + /// + /// \param id + void SetReplacedPDBID(String id) { replaced_pdb_id_ = id; } + + /// \brief Get id of replaced entry. + /// + /// \return id + String GetReplacedPDBID() { return replaced_pdb_id_; } + +private: + /// \enum types of obsolete entries + typedef enum { + OBSLTE, + SPRSDE, + UNKNOWN + } MMCifObsoleteType; + + String date_; ///< date of replacement + MMCifObsoleteType id_; ///< type of entry + String pdb_id_; ///< replacing entry + String replaced_pdb_id_; ///< replaced entry +}; + /// \brief container class for additional information from MMCif files /// /// \section mmcif annotation information @@ -534,6 +613,11 @@ private: /// This class is set up to capture some of it. In detail, we have: /// /// \li citations +/// \li biounits +/// \li transformation information from asym. unit to biounit +/// \li structure information +/// \li resolution +/// \li method class DLLEXPORT_OST_IO MMCifInfo { public: /// \brief Create an info object. @@ -632,6 +716,22 @@ public: return struct_details_; } + /// \brief Add a block of information on obsolete entries + /// + /// \param obsolete + void SetObsoleteInfo(MMCifInfoObsolete obsolete) + { + obsolete_ = obsolete; + } + + /// \brief Get information on an obsolete entries + /// + /// \return MMCifInfoObsolete object + MMCifInfoObsolete GetObsoleteInfo() const + { + return obsolete_; + } + //protected: private: @@ -639,6 +739,7 @@ private: String exptl_method_; Real resolution_; MMCifInfoStructDetails struct_details_; ///< mmCIF struct category + MMCifInfoObsolete obsolete_; ///< obsolete/ superseded entry std::vector<MMCifInfoCitation> citations_; ///< list of citations std::vector<MMCifInfoBioUnit> biounits_; ///< list of biounits std::vector<MMCifInfoTransOpPtr> transops_; diff --git a/modules/io/src/mol/mmcif_reader.cc b/modules/io/src/mol/mmcif_reader.cc index 1e48f28d32585e3720cfec6ebb33faf3a94b34f2..09fa8b6efeb2f2be3bd78c0c3da95a6d5a393c33 100644 --- a/modules/io/src/mol/mmcif_reader.cc +++ b/modules/io/src/mol/mmcif_reader.cc @@ -292,6 +292,14 @@ bool MMCifReader::OnBeginLoop(const StarLoopDesc& header) indices_[SSR_BEG_AUTH_ASYM_ID] = header.GetIndex("beg_auth_asym_id"); indices_[SSR_END_AUTH_ASYM_ID] = header.GetIndex("end_auth_asym_id"); cat_available = true; + } else if (header.GetCategory() == "pdbx_database_PDB_obs_spr") { + category_ = PDBX_DATABASE_PDB_OBS_SPR; + // mandatory items + this->TryStoreIdx(DATE, "date", header); + this->TryStoreIdx(PDPOS_ID, "id", header); + this->TryStoreIdx(PDB_ID, "pdb_id", header); + this->TryStoreIdx(REPLACE_PDB_ID, "replace_pdb_id", header); + cat_available = true; } category_counts_[category_]++; return cat_available; @@ -1247,6 +1255,19 @@ void MMCifReader::ParseStructSheetRange(const std::vector<StringRef>& columns) } } +void MMCifReader::ParsePdbxDatabasePdbObsSpr(const std::vector<StringRef>& + columns) +{ + MMCifInfoObsolete obs_data = MMCifInfoObsolete(); + + obs_data.SetDate(columns[indices_[DATE]].str()); + obs_data.SetID(columns[indices_[PDPOS_ID]]); + obs_data.SetPDBID(columns[indices_[PDB_ID]].str()); + obs_data.SetReplacedPDBID(columns[indices_[REPLACE_PDB_ID]].str()); + + info_.SetObsoleteInfo(obs_data); +} + void MMCifReader::OnDataRow(const StarLoopDesc& header, const std::vector<StringRef>& columns) { @@ -1303,6 +1324,10 @@ void MMCifReader::OnDataRow(const StarLoopDesc& header, LOG_TRACE("processing struct_sheet_range entry") this->ParseStructSheetRange(columns); break; + case PDBX_DATABASE_PDB_OBS_SPR: + LOG_TRACE("processing pdbx_database_PDB_obs_spr entry") + this->ParsePdbxDatabasePdbObsSpr(columns); + break; default: throw IOException(this->FormatDiagnostic(STAR_DIAG_ERROR, "Uncatched category '"+ header.GetCategory() +"' found.", diff --git a/modules/io/src/mol/mmcif_reader.hh b/modules/io/src/mol/mmcif_reader.hh index 130da8ea93f23fbd1f801d021ad07f620e113fdf..af12c813ac0b8146f2e76043ee67b6d545ff61e5 100644 --- a/modules/io/src/mol/mmcif_reader.hh +++ b/modules/io/src/mol/mmcif_reader.hh @@ -54,6 +54,7 @@ namespace ost { namespace io { /// \li struct /// \li struct_conf /// \li struct_sheet_range +/// \li pdbx_database_PDB_obs_spr class DLLEXPORT_OST_IO MMCifReader : public StarParser { public: /// \brief create a MMCifReader @@ -153,7 +154,7 @@ public: return read_seqres_; } - /// \brief Get additional information of the MMCif file. + /// \brief Get additional information of the mmCIF file. /// /// \return MMCitfInfo object const MMCifInfo& GetInfo() { return info_; } @@ -213,17 +214,17 @@ protected: /// \param columns data row void ParseAndAddAtom(const std::vector<StringRef>& columns); - /// \brief Fetch MMCif entity information + /// \brief Fetch mmCIF entity information /// /// \param columns data row void ParseEntity(const std::vector<StringRef>& columns); - /// \brief Fetch MMCif entity_poly information + /// \brief Fetch mmCIF entity_poly information /// /// \param columns data row void ParseEntityPoly(const std::vector<StringRef>& columns); - /// \brief Fetch MMCif citation information + /// \brief Fetch mmCIF citation information /// /// \param columns data row void ParseCitation(const std::vector<StringRef>& columns); @@ -237,27 +238,27 @@ protected: /// to some of the residues. To be consistent, we have to do the conversion on /// our own. String ConvertSEQRES(const String& seqres, conop::CompoundLibPtr compound_lib); - /// \brief Fetch MMCif citation_author information + /// \brief Fetch mmCIF citation_author information /// /// \param columns data row void ParseCitationAuthor(const std::vector<StringRef>& columns); - /// \brief Fetch MMCif exptl information + /// \brief Fetch mmCIF exptl information /// /// \param columns data row void ParseExptl(const std::vector<StringRef>& columns); - /// \brief Fetch MMCif refine information + /// \brief Fetch mmCIF refine information /// /// \param columns data row void ParseRefine(const std::vector<StringRef>& columns); - /// \brief Fetch MMCif pdbx_struct_assembly information + /// \brief Fetch mmCIF pdbx_struct_assembly information /// /// \param columns data row void ParsePdbxStructAssembly(const std::vector<StringRef>& columns); - /// \brief Fetch MMCif pdbx_struct_assembly_gen information + /// \brief Fetch mmCIF pdbx_struct_assembly_gen information /// /// \param columns data row void ParsePdbxStructAssemblyGen(const std::vector<StringRef>& columns); @@ -271,26 +272,31 @@ protected: void StoreRange(const char*& l, const char* s, bool& is_range, int& lborder, std::vector<String>& single_block); - /// \brief Fetch MMCif pdbx_struct_oper_list information + /// \brief Fetch mmCIF pdbx_struct_oper_list information /// /// \param columns data row void ParsePdbxStructOperList(const std::vector<StringRef>& columns); - /// \brief Fetch MMCif struct information + /// \brief Fetch mmCIF struct information /// /// \param columns data row void ParseStruct(const std::vector<StringRef>& columns); - /// \brief Fetch MMCif struct_conf (secondary structure) information + /// \brief Fetch mmCIF struct_conf (secondary structure) information /// /// \param columns data row void ParseStructConf(const std::vector<StringRef>& columns); - /// \brief Fetch MMCif struct_sheet_range (beta sheets) information + /// \brief Fetch mmCIF struct_sheet_range (beta sheets) information /// /// \param columns data row void ParseStructSheetRange(const std::vector<StringRef>& columns); + /// \brief Fetch mmCIF pdbx_database_PDB_obs_spr information + /// + /// \param columns data row + void ParsePdbxDatabasePdbObsSpr(const std::vector<StringRef>& columns); + /// \struct types of secondary structure typedef enum { MMCIF_HELIX, @@ -460,6 +466,14 @@ private: SSR_END_AUTH_ASYM_ID, ///< alternative end, (atom_site.auth_asym_id) } StructSheetRangeItems; + /// \enum items of the pdbx_database_PDB_obs_spr category + typedef enum { + DATE, ///< date of replacement + PDPOS_ID, ///< type of obsolete of this entry + PDB_ID, ///< NEW PDB ID + REPLACE_PDB_ID, ///< OLD PDB ID + } PdbxDatabasePDBObsSpr; + /// \enum categories of the mmcif format typedef enum { ATOM_SITE, @@ -475,6 +489,7 @@ private: STRUCT, STRUCT_CONF, STRUCT_SHEET_RANGE, + PDBX_DATABASE_PDB_OBS_SPR, DONT_KNOW } MMCifCategory; diff --git a/modules/io/tests/test_io_mmcif.py b/modules/io/tests/test_io_mmcif.py index 64caccbe1d6ee51b9faa2e7583a8740d188b70d3..82c0be2e818eb7a9665da2002e32f730aee9c2c8 100644 --- a/modules/io/tests/test_io_mmcif.py +++ b/modules/io/tests/test_io_mmcif.py @@ -135,6 +135,25 @@ class TestMMCifInfo(unittest.TestCase): 'Created with SwissModel') self.assertEquals(i.GetStructDetails().GetModelTypeDetails(), 'Average') + def test_mmcifinfo_obsolete(self): + obs = io.MMCifInfoObsolete() + obs.SetDate('2011-08-31') + obs.SetID('SPRSDE') + obs.SetPDBID('1FOO') + obs.SetReplacedPDBID('2BAR') + self.assertEquals(obs.GetDate(), '2011-08-31') + self.assertEquals(obs.GetID(), 'Supersede') + self.assertEquals(obs.GetPDBID(), '1FOO') + self.assertEquals(obs.GetReplacedPDBID(), '2BAR') + + i = io.MMCifInfo() + obs.id = 'OBSLTE' + i.SetObsoleteInfo(obs) + self.assertEquals(i.GetObsoleteInfo().GetDate(), '2011-08-31') + self.assertEquals(i.GetObsoleteInfo().GetID(), 'Obsolete') + self.assertEquals(i.GetObsoleteInfo().GetPDBID(), '1FOO') + self.assertEquals(i.GetObsoleteInfo().GetReplacedPDBID(), '2BAR') + if __name__== '__main__': unittest.main() diff --git a/modules/io/tests/test_mmcif_info.cc b/modules/io/tests/test_mmcif_info.cc index 2c0b7b3e9e3bd38db83c6c92a321fedef3fa1df2..23a28205ea74f2e15eb9cd793e16113d137da425 100644 --- a/modules/io/tests/test_mmcif_info.cc +++ b/modules/io/tests/test_mmcif_info.cc @@ -29,6 +29,28 @@ using namespace ost::io; BOOST_AUTO_TEST_SUITE( io ); +BOOST_AUTO_TEST_CASE(mmcif_info_obsolete) +{ + BOOST_MESSAGE(" Running mmcif_info_obsolete tests..."); + + MMCifInfoObsolete obs = MMCifInfoObsolete(); + + obs.SetDate("2011-08-31"); + obs.SetID(StringRef("OBSLTE", 6)); + obs.SetPDBID("1FOO"); + obs.SetReplacedPDBID("1BAR"); + + BOOST_CHECK(obs.GetDate() == "2011-08-31"); + BOOST_CHECK(obs.GetID() == "Obsolete"); + BOOST_CHECK(obs.GetPDBID() == "1FOO"); + BOOST_CHECK(obs.GetReplacedPDBID() == "1BAR"); + + obs.SetID(StringRef("SPRSDE", 6)); + BOOST_CHECK(obs.GetID() == "Supersede"); + + BOOST_MESSAGE(" done."); +} + BOOST_AUTO_TEST_CASE(mmcif_info_citation) { BOOST_MESSAGE(" Running mmcif_info_citation tests..."); diff --git a/modules/io/tests/test_mmcif_reader.cc b/modules/io/tests/test_mmcif_reader.cc index be7e35400e2ee43b8811cfb79a7731ef544aceec..e33f6ad6f97e19759ca57eeb964e7d1569afbb2a 100644 --- a/modules/io/tests/test_mmcif_reader.cc +++ b/modules/io/tests/test_mmcif_reader.cc @@ -1142,6 +1142,7 @@ BOOST_AUTO_TEST_CASE(mmcif_parseatomident) BOOST_MESSAGE(" done."); } +/* BOOST_AUTO_TEST_CASE(mmcif_parseandaddatom) { mol::EntityHandle eh = mol::CreateEntity(); @@ -1156,6 +1157,7 @@ BOOST_AUTO_TEST_CASE(mmcif_parseandaddatom) //BOOST_CHECK_THROW(tmmcif_p.ParseAndAddAtom(cols), IOException); //BOOST_MESSAGE(" done."); } +*/ BOOST_AUTO_TEST_CASE(mmcif_testreader) { @@ -1219,6 +1221,11 @@ BOOST_AUTO_TEST_CASE(mmcif_testreader) BOOST_CHECK(sd.GetMassMethod() == "Good Guess"); BOOST_CHECK(sd.GetModelDetails() == "Even better guessing"); BOOST_CHECK(sd.GetModelTypeDetails() == "Guess"); + MMCifInfoObsolete obs = mmcif_p.GetInfo().GetObsoleteInfo(); + BOOST_CHECK(obs.GetDate() == "2011-08-31"); + BOOST_CHECK(obs.GetID() == "Obsolete"); + BOOST_CHECK(obs.GetPDBID() == "1FOO"); + BOOST_CHECK(obs.GetReplacedPDBID() == "2BAR"); BOOST_MESSAGE(" done."); BOOST_MESSAGE(" done."); diff --git a/modules/io/tests/testfiles/mmcif/atom_site.mmcif b/modules/io/tests/testfiles/mmcif/atom_site.mmcif index 1cd5d1c357fe5a84db67d7fbafdae1231a3ae937..ce890cf9d3be305019cbc431726433df8a06beee 100644 --- a/modules/io/tests/testfiles/mmcif/atom_site.mmcif +++ b/modules/io/tests/testfiles/mmcif/atom_site.mmcif @@ -6,6 +6,11 @@ data_1BAR # this file is also used in the mmcif_mmcif_chaintype_setting test for a true # positive test, hence the entity category is not to be changed +_pdbx_database_PDB_obs_spr.id OBSLTE +_pdbx_database_PDB_obs_spr.date 2011-08-31 +_pdbx_database_PDB_obs_spr.pdb_id 1FOO +_pdbx_database_PDB_obs_spr.replace_pdb_id 2BAR + loop_ _entity.id _entity.type