diff --git a/modules/io/doc/mmcif.rst b/modules/io/doc/mmcif.rst index e172b34a3a7d393adf230dbd0444ec29a691981b..38934cc7943df2c6d1cec5f40dd43c34485368e6 100644 --- a/modules/io/doc/mmcif.rst +++ b/modules/io/doc/mmcif.rst @@ -160,6 +160,42 @@ of the annotation available. .. method:: GetStructDetails() + .. method:: AddMMCifPDBChainTr(cif_chain_id, pdb_chain_id) + + Set up a translation for a certain mmCIF chain name to the traditional PDB + chain name. + + :param cif_chain_id: atom_site.label_asym_id + :type cif_chain_id: :class:`str` + :param pdb_chain_id: atom_site.auth_asym_id + :type pdb_chain_id: :class:`str` + + .. method:: GetMMCifPDBChainTr(cif_chain_id) + + Get the translation of a certain mmCIF chain name to the traditional PDB + chain name. + + :param cif_chain_id: atom_site.label_asym_id + :type cif_chain_id: :class:`str` + :returns: atom_site.auth_asym_id as :class:`str` + + .. method:: AddPDBCMMCifhainTr(pdb_chain_id, cif_chain_id) + + Set up a translation for a certain PDB chain name to the mmCIF chain name. + + :param pdb_chain_id: atom_site.label_asym_id + :type pdb_chain_id: :class:`str` + :param cif_chain_id: atom_site.auth_asym_id + :type cif_chain_id: :class:`str` + + .. method:: GetPDBMMCifChainTr(pdb_chain_id) + + Get the translation of a certain PDB chain name to the mmCIF chain name. + + :param pdb_chain_id: atom_site.auth_asym_id + :type pdb_chain_id: :class:`str` + :returns: atom_site.label_asym_id as :class:`str` + .. class:: MMCifInfoCitation This stores citation information from an input file. @@ -794,4 +830,5 @@ of the annotation available. .. LocalWords: cas isbn pubmed asu seqres conop ConnectAll casp COMPND OBSLTE .. LocalWords: SPRSDE pdb func autofunction exptl attr pdbx oper conf spr dif -.. LocalWords: biounits biounit uniprot UNP seqs +.. LocalWords: biounits biounit uniprot UNP seqs AddMMCifPDBChainTr cif asym +.. LocalWords: auth GetMMCifPDBChainTr AddPDBCMMCifhainTr GetPDBMMCifChainTr diff --git a/modules/io/pymod/export_mmcif_io.cc b/modules/io/pymod/export_mmcif_io.cc index 6654393be01fefaefa346b9f86bcdef5f63e9c7e..18ec3713b0eaabad5121727e1389fa87001782df 100644 --- a/modules/io/pymod/export_mmcif_io.cc +++ b/modules/io/pymod/export_mmcif_io.cc @@ -280,6 +280,10 @@ void export_mmcif_io() .def("GetStructDetails", &MMCifInfo::GetStructDetails) .def("SetObsoleteInfo", &MMCifInfo::SetObsoleteInfo) .def("GetObsoleteInfo", &MMCifInfo::GetObsoleteInfo) +.def("AddMMCifPDBChainTr", &MMCifInfo::AddMMCifPDBChainTr) + .def("GetMMCifPDBChainTr", &MMCifInfo::GetMMCifPDBChainTr) + .def("AddPDBMMCifChainTr", &MMCifInfo::AddPDBMMCifChainTr) + .def("GetPDBMMCifChainTr", &MMCifInfo::GetPDBMMCifChainTr) .add_property("citations", make_function(&MMCifInfo::GetCitations, return_value_policy<copy_const_reference>())) .add_property("biounits", make_function(&MMCifInfo::GetBioUnits, diff --git a/modules/io/src/mol/mmcif_info.cc b/modules/io/src/mol/mmcif_info.cc index c8565a2e809ff1f84b182b9f73e4cacd1866c509..29f0db7463188068b3584a49457126d7ec1601d0 100644 --- a/modules/io/src/mol/mmcif_info.cc +++ b/modules/io/src/mol/mmcif_info.cc @@ -22,6 +22,42 @@ namespace ost { namespace io { +void MMCifInfo::AddMMCifPDBChainTr(String cif, String pdb) +{ + std::map<String, String>::iterator tr_it = cif_2_pdb_chain_id_.find(cif); + if (tr_it != cif_2_pdb_chain_id_.end()) { + throw IOException("mmCIF chain id '"+ cif +"' is already mapped to '"+ + tr_it->second+"'."); + } + cif_2_pdb_chain_id_.insert(std::pair<String, String>(cif, pdb)); +} + +String MMCifInfo::GetMMCifPDBChainTr(String cif) const +{ + std::map<String, String>::const_iterator tr_it = + cif_2_pdb_chain_id_.find(cif); + if (tr_it == cif_2_pdb_chain_id_.end()) { return ""; } + return tr_it->second; +} + +void MMCifInfo::AddPDBMMCifChainTr(String pdb, String cif) +{ + std::map<String, String>::iterator tr_it = pdb_2_cif_chain_id_.find(pdb); + if (tr_it != pdb_2_cif_chain_id_.end()) { + throw IOException("PDB chain id '"+ pdb +"' is already mapped to '"+ + tr_it->second+"'."); + } + pdb_2_cif_chain_id_.insert(std::pair<String, String>(pdb, cif)); +} + +String MMCifInfo::GetPDBMMCifChainTr(String pdb) const +{ + std::map<String, String>::const_iterator tr_it = + pdb_2_cif_chain_id_.find(pdb); + if (tr_it == pdb_2_cif_chain_id_.end()) { return ""; } + return tr_it->second; +} + void MMCifInfo::AddAuthorsToCitation(StringRef id, std::vector<String> list) { // find citation diff --git a/modules/io/src/mol/mmcif_info.hh b/modules/io/src/mol/mmcif_info.hh index b240e5a1c86eedaddd76de2ba71bbe009b590f3d..3eebf54b2369947c9d007e616c4d33c2f968750c 100644 --- a/modules/io/src/mol/mmcif_info.hh +++ b/modules/io/src/mol/mmcif_info.hh @@ -22,6 +22,7 @@ #include <vector> #include <map> #include <boost/shared_ptr.hpp> +#include <ost/seq/sequence_list.hh> #include <ost/geom/geom.hh> #include <ost/string_ref.hh> #include <ost/io/module_config.hh> @@ -771,6 +772,30 @@ public: /// \return experiment resolution Real GetResolution() const { return resolution_; } + /// \brief Add a new mmCIF/ PDB chain name tuple. + /// + /// \param cif chain name as used by the mmCIF file (label_asym_id) + /// \param pdb chain name as used in the PDB file (auth_asym_id) + void AddMMCifPDBChainTr(String cif, String pdb); + + /// \brief Get a PDB chain name for a CIF chain name + /// + /// \param cif chain name as used by the mmCIF file (label_asym_id) + /// \return chain name as used in the PDB file (auth_asym_id) + String GetMMCifPDBChainTr(String cif) const; + + /// \brief Add a new PDB/ mmCIF chain name tuple. + /// + /// \param pdb chain name as used by the PDB file (auth_asym_id) + /// \param cif chain name as used in the mmCIF file (label_asym_id) + void AddPDBMMCifChainTr(String pdb, String cif); + + /// \brief Get a CIF chain name for a PDB chain name + /// + /// \param pdb chain name as used by the mmCIF file (auth_asym_id) + /// \return chain name as used in the PDB file (label_asym_id) + String GetPDBMMCifChainTr(String pdb) const; + /// \brief Add a biounit /// /// \param bu biounit to be added @@ -848,6 +873,8 @@ private: std::vector<MMCifInfoBioUnit> biounits_; ///< list of biounits std::vector<MMCifInfoTransOpPtr> transops_; MMCifInfoStructRefs struct_refs_; + std::map<String, String> cif_2_pdb_chain_id_; + std::map<String, String> pdb_2_cif_chain_id_; }; diff --git a/modules/io/src/mol/mmcif_reader.cc b/modules/io/src/mol/mmcif_reader.cc index 2bcdee810c013971aec9f1c9f631320bbfe719b6..762939ca817800366ee487ffadef9da493f5103a 100644 --- a/modules/io/src/mol/mmcif_reader.cc +++ b/modules/io/src/mol/mmcif_reader.cc @@ -1539,6 +1539,7 @@ void MMCifReader::OnEndData() // process chain types std::vector<std::pair<mol::ChainHandle, String> >::const_iterator css; MMCifEntityDescMap::const_iterator edm_it; + String pdb_auth_chain_name; for (css = chain_id_pairs_.begin(); css != chain_id_pairs_.end(); ++css) { edm_it = entity_desc_map_.find(css->second); @@ -1548,6 +1549,9 @@ void MMCifReader::OnEndData() if (edm_it->second.seqres.length() > 0) { seqres_.AddSequence(seq::CreateSequence(css->first.GetName(), edm_it->second.seqres)); + pdb_auth_chain_name = css->first.GetStringProp("pdb_auth_chain_name"); + info_.AddMMCifPDBChainTr(css->first.GetName(), pdb_auth_chain_name); + info_.AddPDBMMCifChainTr(pdb_auth_chain_name, css->first.GetName()); } else if (edm_it->second.type!=mol::CHAINTYPE_WATER) { // mark everything that doesn't have SEQRES as ligand and isn't of type // water as ligand diff --git a/modules/io/tests/test_mmcif_info.cc b/modules/io/tests/test_mmcif_info.cc index 3d26747976b618212b05d0416012ecc2b78f5fc5..65e4fdaecb42ec54fb50d3c579f24ba8b7ae8adb 100644 --- a/modules/io/tests/test_mmcif_info.cc +++ b/modules/io/tests/test_mmcif_info.cc @@ -202,7 +202,17 @@ BOOST_AUTO_TEST_CASE(mmcif_info) #else BOOST_CHECK_CLOSE(info.GetResolution(), 1.9f, 0.001f); #endif - + + info.AddMMCifPDBChainTr("A", "B"); + BOOST_CHECK_THROW(info.AddMMCifPDBChainTr("A", "B"), IOException); + BOOST_CHECK("B" == info.GetMMCifPDBChainTr("A")); + BOOST_CHECK("" == info.GetMMCifPDBChainTr("C")); + + info.AddPDBMMCifChainTr("A", "B"); + BOOST_CHECK_THROW(info.AddPDBMMCifChainTr("A", "B"), IOException); + BOOST_CHECK("B" == info.GetPDBMMCifChainTr("A")); + BOOST_CHECK("" == info.GetPDBMMCifChainTr("C")); + BOOST_MESSAGE(" done."); } diff --git a/modules/io/tests/testfiles/mmcif/atom_site.mmcif b/modules/io/tests/testfiles/mmcif/atom_site.mmcif index ce890cf9d3be305019cbc431726433df8a06beee..68b2fbb5769c6e735ec7e30cb7311f002f9b3346 100644 --- a/modules/io/tests/testfiles/mmcif/atom_site.mmcif +++ b/modules/io/tests/testfiles/mmcif/atom_site.mmcif @@ -168,10 +168,10 @@ ATOM C CB ILE A 13 . 1 21.236 34.463 16.492 1.00 22.67 . 13 21 ? A ATOM C CG1 ILE A 13 . 1 20.478 33.469 17.371 1.00 22.14 . 13 22 ? A ATOM C CG2 ILE A 13 . 1 21.357 33.986 15.016 1.00 21.75 . 13 23 ? A # - - - - data truncated for brevity - - - - -HETATM C C1 APS C 1 1 1 4.171 29.012 7.116 0.58 17.27 1 300 101 ? A -HETATM C C2 APS C 1 1 1 4.949 27.758 6.793 0.58 16.95 1 300 102 ? A -HETATM O O3 APS C 1 1 1 4.800 26.678 7.393 0.58 16.85 1 300 103 ? A -HETATM N N4 APS C 1 1 1 5.930 27.841 5.869 0.58 16.43 1 300 104 ? A +HETATM C C1 APS C 1 1 1 4.171 29.012 7.116 0.58 17.27 1 300 101 ? C +HETATM C C2 APS C 1 1 1 4.949 27.758 6.793 0.58 16.95 1 300 102 ? C +HETATM O O3 APS C 1 1 1 4.800 26.678 7.393 0.58 16.85 1 300 103 ? C +HETATM N N4 APS C 1 1 1 5.930 27.841 5.869 0.58 16.43 1 300 104 ? C # - - - - data truncated for brevity - - - - # chain to be ignored by 'restrict_chains' feature ATOM N N ILE Z 1 . 1 23.664 33.855 16.884 1.00 22.08 . 1 17 ? Z